/*** clean four datasets, get firm level homophily dummy ***/
/*** construct degree of homophily ***/
/*** firm level homo_edu_r  _work_r and _either_r ***/

/*** combine four databased and clean ***/
proc import datafile="D:\research\networking\network_tass.csv" dbms=CSV out=tass replace;
getnames=yes;
guessingrows=5000;
run;

proc import datafile="D:\research\networking\network_mstar.csv" dbms=CSV out=mstar replace;
getnames=yes;
guessingrows=5000;
run;

proc import datafile="D:\research\networking\network_hfr.csv" dbms=CSV out=hfr replace;
getnames=yes;
guessingrows=50000;
run;

proc import datafile="D:\research\networking\network_barclay.csv" dbms=CSV out=barclay replace;
getnames=yes;
guessingrows=5000;
run;

data network_combine;
set tass mstar hfr barclay;
run;
/*** get firm level homo edu number ***/

proc import datafile="D:\research\networking\univ.csv" dbms=CSV out=univ replace;
getnames=yes;
guessingrows=5000;
run;
data univ;
set univ;
sat=(sat25+sat75)/2;
run;
data network_combine_edu;
set network_combine;
where education not="";
drop working;
run;
data network_combine_work;
set network_combine;
where working not="";
drop education;
run;

PROC SQL; 
	CREATE TABLE network_combine_edu AS
	SELECT *
	FROM network_combine_edu AS a LEFT JOIN univ AS b
	ON a.education=b.education;
	QUIT;

proc sort nodupkeys;
by firmname first_name last_name education_r sat25 sat75 sat;
run;
data network_combine_edu1;
set network_combine_edu;
keep fundname firmname first_name last_name education_r;
run;
data network_combine_edu1;
set network_combine_edu1;
rename education_r=education_r1;
first_name_match=first_name;
education_match1=upcase(substr(education_r,1,12));
run;

data network_combine_edu;
set network_combine_edu;
education_match=upcase(substr(education_r,1,12));
hhi_count=1;
run;
proc sort data=network_combine_edu out=network_edu_hhi;
by firmname education_r;
run;
proc sql; 
create table network_edu_hhi as
select *, sum(hhi_count) as totalhhi_count
from network_edu_hhi
group by firmname, education_r;
quit;
proc sort data=network_edu_hhi nodupkeys;
by firmname education_r;
run;
proc export data=network_edu_hhi
outfile='d:\research\networking\network_edu_hhi.csv'
dbms=csv
replace;
run;

/*** table 1 summary stats ***/
proc sort data=network_combine_edu out=edulist nodupkeys;
by education_r fundname;
run;
data edulist;
set edulist;
count1=1;
run;

proc sql; 
create table edulist1 as
select *, sum(count1) as totalcount1
from edulist
group by education_r;
quit;
proc sort nodupkeys;
by education_r;
run;
proc sort;
by descending totalcount1;
run;
proc sort data=edulist out=funds nodupkeys;
by fundname;
run;
proc export data=edulist1
outfile='d:\research\networking\edulist2.csv'
dbms=csv;
run;

proc sort data=network_combine_edu out=edulist3 nodupkeys;
by education_r first_name last_name;
run;
data edulist3;
set edulist3;
count1=1;
run;

proc sql; 
create table edulist3 as
select *, sum(count1) as totalcount1
from edulist3
group by education_r;
quit;
proc sort nodupkeys;
by education_r;
run;
proc sort;
by descending totalcount1;
run;
proc sort data=edulist out=managers nodupkeys;
by first_name last_name;
run;
proc export data=edulist3
outfile='d:\research\networking\edulist3.csv'
dbms=csv;
run;

proc sql;
	  create table firmname_match_edu as
	  select * 
	  from network_combine_edu, network_combine_edu1
	  where network_combine_edu.firmname=network_combine_edu1.firmname;
quit;

data firmname_match_edu;
set firmname_match_edu;
if first_name_match not=first_name and education_match=education_match1 and education not="" then homo_edu=1;else homo_edu=0;
run;

data homo_match_edu;
set firmname_match_edu;
where homo_edu=1;
count=1;
run;

proc sort nodupkeys;
by firmname first_name last_name;
run;

proc sql; 
create table homo_match_edu1 as
select *, sum(count) as sumhomo_edu
from homo_match_edu
group by firmname;
quit;
proc sort data=homo_match_edu1 nodupkeys;
by firmname;
run;


/*** get firm level homo work number ***/
proc sort data=network_combine_work out=worklist nodupkeys;
by working fundname;
run;
data worklist;
set worklist;
count1=1;
run;

proc sql; 
create table worklist1 as
select *, sum(count1) as totalcount1
from worklist
group by working;
quit;
proc sort nodupkeys;
by working;
run;
proc sort;
by descending totalcount1;
run;

proc export data=worklist1
outfile='d:\research\networking\worklist2.csv'
dbms=csv
replace;
run;

proc export data=network_combine_edu
outfile='d:\research\networking\network_combine_edu.csv'
dbms=csv
replace;
run;

proc export data=network_combine_work
outfile='d:\research\networking\network_combine_work.csv'
dbms=csv
replace;
run;


proc sort data=network_combine_work out=worklist3 nodupkeys;
by working first_name last_name;
run;
data worklist3;
set worklist3;
count1=1;
run;

proc sql; 
create table worklist3 as
select *, sum(count1) as totalcount1
from worklist3
group by working;
quit;
proc sort nodupkeys;
by working;
run;
proc sort;
by descending totalcount1;
run;

proc export data=worklist3
outfile='d:\research\networking\worklist3.csv'
dbms=csv
replace;
run;

proc sort data=network_combine_work nodupkeys;
by firmname first_name last_name working;
run;
data network_combine_work1;
set network_combine_work;
keep fundname firmname first_name last_name working;
run;
data network_combine_work1;
set network_combine_work1;
rename working=working1;
first_name_match=first_name;
working_match1=upcase(substr(working,1,15));
run;
data network_combine_work1;
set network_combine_work1;
working_match=upcase(substr(working1,1,15));
hhi_count=1;
run;
proc sort data=network_combine_work1 out=network_work_hhi;
by firmname working_match;
run;
proc sql; 
create table network_work_hhi as
select *, sum(hhi_count) as totalhhi_count
from network_work_hhi
group by firmname, working_match;
quit;
proc sort data=network_work_hhi nodupkeys;
by firmname working_match;
run;
proc export data=network_work_hhi
outfile='d:\research\networking\network_work_hhi.csv'
dbms=csv
replace;
run;
proc sql;
	  create table firmname_match_work as
	  select * 
	  from network_combine_work, network_combine_work1
	  where network_combine_work.firmname=network_combine_work1.firmname;
quit;

data firmname_match_work;
set firmname_match_work;
if first_name_match not=first_name and working_match=working_match1 and working not="" then homo_work=1;else homo_work=0;
run;

data homo_match_work;
set firmname_match_work;
where homo_work=1;
count=1;
run;

proc sort nodupkeys;
by firmname first_name last_name;
run;

proc sql; 
create table homo_match_work1 as
select *, sum(count) as sumhomo_work
from homo_match_work
group by firmname;
quit;
proc sort data=homo_match_work1 nodupkeys;
by firmname;
run;


/*** get the total number of managers of firms, and then compute two ratios ***/
/*** backfill bias ***/
libname testo 'd:\research\testosterone';
run;
data fund_performance_98_4;
set testo.fund_performance_98_0319_2015;
run;
data fund_performance_psuedo;
set fund_performance_98_4;
where year<=1998;
run;
proc export data=fund_performance_psuedo
   outfile='d:\research\networking\fund_performance_psuedo.csv'
   dbms=csv
   replace;
run;

data strategy;
set fund_performance_98_4;
keep Primarycategory;
run;
proc sort out=strategy nodupkeys;
by primarycategory;
run;
proc export data=strategy
outfile='d:\research\networking\strategy.csv'
dbms=csv
replace;
run;
data fund_performance_98_4;
set fund_performance_98_4;
if primarycategory="Activist" then strategy2="Event Driven";
if primarycategory="Distressed Securities" then strategy2="Event Driven";
if primarycategory="Merger Arbitrage" then strategy2="Event Driven";
if primarycategory="Event-Driven" then strategy2="Event Driven";
if primarycategory="Event Driven" then strategy2="Event Driven";
if primarycategory="HF Event Driven" then strategy2="Event Driven";

if primarycategory="Arbitrage" then strategy2="Global Macro";
if primarycategory="Stock Index" then strategy2="Global Macro";
if primarycategory="Discretionary" then strategy2="Global Macro";
if primarycategory="Global Macro" then strategy2="Global Macro";
if primarycategory="Systematic" then strategy2="Global Macro";
if primarycategory="Macro" then strategy2="Global Macro";
if primarycategory="HF Global Macro" then strategy2="Global Macro";

if primarycategory="Fund of Funds" then strategy2="Fund of Funds";
if primarycategory="Fund of Funds - Arbitrage" then strategy2="Fund of Funds";
if primarycategory="Fund of Funds - Diversifi" then strategy2="Fund of Funds";
if primarycategory="HF Fund of Funds - Debt" then strategy2="Fund of Funds";
if primarycategory="HF Fund of Funds - Equity" then strategy2="Fund of Funds";
if primarycategory="HF Fund of Funds - Event" then strategy2="Fund of Funds";
if primarycategory="HF Fund of Funds - Macro/" then strategy2="Fund of Funds";
if primarycategory="HF Fund of Funds - Multis" then strategy2="Fund of Funds";
if primarycategory="HF Fund of Funds - Relati" then strategy2="Fund of Funds";

if primarycategory="Balanced (Stocks &amp; Bo" then strategy2="Multi-Strategy";
if primarycategory="Multi-Advisor" then strategy2="Multi-Strategy";
if primarycategory="Multi-Strategy" then strategy2="Multi-Strategy";
if primarycategory="HF Multistrategy" then strategy2="Multi-Strategy";

if primarycategory="CTA" then strategy2="CTA";
if primarycategory="Managed Futures" then strategy2="CTA";
if primarycategory="Fundamental - Agricultural" then strategy2="CTA";
if primarycategory="Fundamental - Currency" then strategy2="CTA";
if primarycategory="Fundamental - Diversified" then strategy2="CTA";
if primarycategory="Fundamental - Energy" then strategy2="CTA";
if primarycategory="Fundamental - Financial/M" then strategy2="CTA";
if primarycategory="HF Currency" then strategy2="CTA";
if primarycategory="HF Systematic Futures" then strategy2="CTA";

if primarycategory="Emerging Markets" then strategy2="Emerging Markets";
if primarycategory="Emerging Markets - Asia" then strategy2="Emerging Markets";
if primarycategory="Emerging Markets - Easter" then strategy2="Emerging Markets";
if primarycategory="Emerging Markets - Global" then strategy2="Emerging Markets";
if primarycategory="Emerging Markets - Latin" then strategy2="Emerging Markets";
if primarycategory="Emerging Markets - MENA" then strategy2="Emerging Markets";
if primarycategory="Emerging Markets - Other" then strategy2="Emerging Markets";
if primarycategory="HF Asia/Pacific Long/Shor" then strategy2="Emerging Markets";
if primarycategory="HF Emerging Markets Long-" then strategy2="Emerging Markets";
if primarycategory="HF Emerging Markets Long/" then strategy2="Emerging Markets";
if primarycategory="HF China Long/Short Equit" then strategy2="Emerging Markets";
if primarycategory="HF Europe Long/Short Equi" then strategy2="Emerging Markets";

if primarycategory="Equity Long/Short" then strategy2="Long/Short";
if primarycategory="Equity Long/Short - Growt" then strategy2="Long/Short";
if primarycategory="Equity Long/Short - Oppor" then strategy2="Long/Short";
if primarycategory="Equity Long/Short - Quant" then strategy2="Long/Short";
if primarycategory="Equity Long/Short - Tradi" then strategy2="Long/Short";
if primarycategory="Equity Long/Short - Value" then strategy2="Long/Short";
if primarycategory="Equity 130-30" then strategy2="Long/Short";
if primarycategory="Equity Hedge" then strategy2="Long/Short";
if primarycategory="Equity Long-Bias" then strategy2="Long/Short";
if primarycategory="HF Long/Short Debt" then strategy2="Long/Short";
if primarycategory="Long/Short Equity Hedge" then strategy2="Long/Short";
if primarycategory="Closed-end funds" then strategy2="Long/Short";
if primarycategory="HF U.S. Long/Short Equity" then strategy2="Long/Short";
if primarycategory="HF U.S. Small Cap Long/Sh" then strategy2="Long/Short";
if primarycategory="HF Global Long/Short Equi" then strategy2="Long/Short";

if primarycategory="Equity Market Neutral" then strategy2="Market-Neutral";
if primarycategory="Equity Market Neutral - Q" then strategy2="Market-Neutral";
if primarycategory="Equity Market Neutral - V" then strategy2="Market-Neutral";
if primarycategory="Statistical Arbitrage" then strategy2="Market-Neutral";
if primarycategory="HF Equity Market Neutral" then strategy2="Market-Neutral";

if primarycategory="Equity Long Only" then strategy2="Long Only";
if primarycategory="Equity Long Only - Growth" then strategy2="Long Only";
if primarycategory="Equity Long Only - Opport" then strategy2="Long Only";
if primarycategory="Equity Long Only - Quanti" then strategy2="Long Only";
if primarycategory="Equity Long Only - Tradin" then strategy2="Long Only";
if primarycategory="Equity Long Only - Value" then strategy2="Long Only";
if primarycategory="Mutual Funds/ETFs" then strategy2="Long Only";
if primarycategory="HF Long-Only Debt" then strategy2="Long Only";
if primarycategory="HF Long-Only Equity" then strategy2="Long Only";
if primarycategory="HF Long-Only Other" then strategy2="Long Only";
if primarycategory="HF Bear Market Equity" then strategy2="Long Only";

if primarycategory="Convertible Arbitrage" then strategy2="Relative Value";
if primarycategory="Convertible Arbitrage - C" then strategy2="Relative Value";
if primarycategory="Convertible Arbitrage - V" then strategy2="Relative Value";
if primarycategory="Fixed Income - ABS/Sec. L" then strategy2="Relative Value";
if primarycategory="Fixed Income - Arbitrage" then strategy2="Relative Value";
if primarycategory="Fixed Income - Arbitrage" then strategy2="Relative Value";
if primarycategory="Fixed Income - Arbitrage" then strategy2="Relative Value";
if primarycategory="Fixed Income - Asset-Back" then strategy2="Relative Value";
if primarycategory="Fixed Income - Convertibl" then strategy2="Relative Value";
if primarycategory="Fixed Income - Diversifie" then strategy2="Relative Value";
if primarycategory="Fixed Income - High Yield" then strategy2="Relative Value";
if primarycategory="Fixed Income - Insurance-" then strategy2="Relative Value";
if primarycategory="Fixed Income - Long-Only" then strategy2="Relative Value";
if primarycategory="Fixed Income - Long/Short" then strategy2="Relative Value";
if primarycategory="Fixed Income - Mortgage B" then strategy2="Relative Value";
if primarycategory="Fixed Income Arbitrage" then strategy2="Relative Value";
if primarycategory="Option Strategies" then strategy2="Relative Value";
if primarycategory="Options Strategy" then strategy2="Relative Value";
if primarycategory="Volatility Trading" then strategy2="Relative Value";
if primarycategory="Tail Risk" then strategy2="Relative Value";
if primarycategory="HF Convertible Arbitrage" then strategy2="Relative Value";
if primarycategory="HF Debt Arbitrage" then strategy2="Relative Value";
if primarycategory="HF Distressed Securities" then strategy2="Relative Value";
if primarycategory="HF Diversified Arbitrage" then strategy2="Relative Value";
if primarycategory="HF Merger Arbitrage" then strategy2="Relative Value";
if primarycategory="Relative Value" then strategy2="Relative Value";
if primarycategory="HF Volatility" then strategy2="Relative Value";

if primarycategory="Sector - Energy" then strategy2="Sector";
if primarycategory="Sector - Environment" then strategy2="Sector";
if primarycategory="Sector - Farming" then strategy2="Sector";
if primarycategory="Sector - Financial" then strategy2="Sector";
if primarycategory="Sector - Health Care/Biot" then strategy2="Sector";
if primarycategory="Sector - Metals/Mining" then strategy2="Sector";
if primarycategory="Sector - Miscellaneous" then strategy2="Sector";
if primarycategory="Sector - Natural Resource" then strategy2="Sector";
if primarycategory="Sector - Real Estate" then strategy2="Sector";
if primarycategory="Sector - Technology" then strategy2="Sector";
if primarycategory="Technical - Agricultural" then strategy2="Sector";
if primarycategory="Technical - Currency" then strategy2="Sector";
if primarycategory="Technical - Diversified" then strategy2="Sector";
if primarycategory="Technical - Energy" then strategy2="Sector";
if primarycategory="Technical - Financial/Met" then strategy2="Sector";
if primarycategory="Technical - Interest Rate" then strategy2="Sector";

if primarycategory="Equity Short-Bias" then strategy2="Short Bias";
if primarycategory="Dedicated Short Bias" then strategy2="Short Bias";
if primarycategory="Equity Dedicated Short" then strategy2="Short Bias";
run;

proc sort out=firm_manager nodupkeys;
by companyname manager_name;
run;
data firm_manager;
set firm_manager;
manager_count=1;
run;

/*** check the sample with >= managers in the firm ***/
proc sql; 
create table firm_manager as
select *, sum(manager_count) as totalmanager_count
from firm_manager
group by companyname;
quit;
proc sort out=firm_manager nodupkeys;
by companyname;
run;
data firm_manager;
set firm_manager;
keep companyname totalmanager_count;
run;
PROC SQL; 
CREATE TABLE fund_performance_98_4 AS
SELECT *
FROM fund_performance_98_4 AS a LEFT JOIN firm_manager AS b
ON a.companyname=b.companyname;
QUIT;
data fund_performance_3;
set fund_performance_98_4;
where totalmanager_count>=3;
run;
proc sort data=fund_performance_3 out=funds nodupkeys;
by fund_id;
run;
proc sort data=fund_performance_3 out=managers nodupkeys;
by manager_name;
run;

data fund_performance;
set fund_performance_98_4;
where duplicate=0 & class="TASS";
run;

/*** get time-series firm AUM ***/ 
proc sort data=fund_performance_98_4 nodupkeys;
by fund_id manager_name year month;
run;
proc sort data=fund_performance_98_4 out=funds nodupkeys;
by fund_id;
run;

proc sort data=fund_performance_98_4;
by companyname year month;
run;
proc sql; 
create table fund_performance_98_4 as
select *, sum(size) as firmsize
from fund_performance_98_4
group by companyname, year, month;
quit;

proc sort data=fund_performance_98_4;
by fund_id manager_name year month;
run;

/*** compute time-series of manager count at firm level ***/
data manager_time;
set network_combine;
keep firmname first_name last_name startyear_work endyear_work;
run;
data fund_performance_98_4;
set fund_performance_98_4;
first=scan(manager_name,1);
last=scan(manager_name,-1);
run;
proc sort data=manager_time nodupkeys;
by firmname first_name last_name;
run;

/*** backfill bias adjustment new ***/
/*** tass ***/
proc import datafile="d:\research\testosterone\dateadd_tass1.txt" dbms=dlm out=dateadd_tass1 replace;
getnames=yes;
delimiter=',';
run;
proc import datafile="d:\research\testosterone\dateadd_tass2.txt" dbms=dlm out=dateadd_tass2 replace;
getnames=yes;
delimiter=',';
run;
data dateadd_tass;
set dateadd_tass1 dateadd_tass2;
run;
proc sort nodupkeys;
by productreference;
run;
data dateadd_tass;
set dateadd_tass;
dateadd = datepart(dateaddedtotass);
run;
data dateadd_tass;
set dateadd_tass;
addyear=year(dateadd);
addmonth=month(dateadd);
run;
/*** HFR ***/
proc import datafile="d:\research\testosterone\hfr\dead_funds\excel_fund.xls" dbms=xls out=deadfund_detail replace;
getnames=yes;
guessingrows=50000;
run;
proc import datafile="d:\research\testosterone\hfr\live_funds\excel_fund.xls" dbms=xls out=livefund_detail replace;
getnames=yes;
guessingrows=50000;
run;

data hfrdetails;
set deadfund_detail livefund_detail;
run;
data dateadd_hfr;
set hfrdetails;
keep fund_id Date_Added_to_DB;
run;

data dateadd_hfr;
set dateadd_hfr;
keep fund_id Date_Added_to_DB;
run;
data dateadd_hfr;
set dateadd_hfr;
addyear=year(Date_Added_to_DB);
addmonth=month(Date_Added_to_DB);
run;

/*** Mstar ***/
/*** use the new method ***/
proc import datafile="d:\research\testosterone\mstar\hf_ret.csv" dbms=CSV out=hf_ret replace;
getnames=yes;
guessingrows=5000;
run;
proc sort;
by name secid;
run;

proc transpose data=hf_ret
out=hf_ret;
by name secid;
run;

data hf_ret;
set hf_ret;
year=substr(_name_,16,4);
month=substr(_name_,21,2);
rename col1=return;
run;
data hf_ret;
set hf_ret;
where return not=.;
run;

data hf_ret;
set hf_ret;
if year='1994' then year1=1994;
if year='1995' then year1=1995;
if year='1996' then year1=1996;
if year='1997' then year1=1997;
if year='1998' then year1=1998;
if year='1999' then year1=1999;
if year='2000' then year1=2000;
if year='2001' then year1=2001;
if year='2002' then year1=2002;
if year='2003' then year1=2003;
if year='2004' then year1=2004;
if year='2005' then year1=2005;
if year='2006' then year1=2006;
if year='2007' then year1=2007;
if year='2008' then year1=2008;
if year='2009' then year1=2009;
if year='2010' then year1=2010;
if year='2011' then year1=2011;
if year='2012' then year1=2012;
if year='2013' then year1=2013;
if year='2014' then year1=2014;
if year='2015' then year1=2015;
if year='2016' then year1=2016;
if year='2017' then year1=2017;
if year='2018' then year1=2018;
if year='2019' then year1=2019;
if month='01' then month1=1;
if month='02' then month1=2;
if month='03' then month1=3;
if month='04' then month1=4;
if month='05' then month1=5;
if month='06' then month1=6;
if month='07' then month1=7;
if month='08' then month1=8;
if month='09' then month1=9;
if month='10' then month1=10;
if month='11' then month1=11;
if month='12' then month1=12;
return=return/100;
run;
proc sort data=hf_ret;
by secid;
run;
data hf_ret;
set hf_ret;
by secid;
retain id 0;
if first.secid then id+1;
run;
data hf_ret;
set hf_ret;
id1=ceil(id/20);
temp=1;
run;
proc sort data=hf_ret;
by id1 year1 month1 id;
run;
/*** within each group, get the month with most returns ***/
proc sql; 
create table dateadd_mstar as
select *, sum(temp) as ret_count
from hf_ret
group by id1,year1,month1;
quit;

proc sort nodupkeys;
by id1 year1 month1;
run;
proc sort;
by id1 descending ret_count;
run;
proc sort nodupkeys;
by id1;
run;
proc sort data=hf_ret out=fund nodupkeys;
by secid;
run;
data dateadd_mstar;
set dateadd_mstar;
rename year1=addyear month1=addmonth;
keep id1 year1 month1; 
run;
data fund;
set fund;
keep secid id1;
run;
PROC SQL; 
CREATE TABLE dateadd_mstar AS
SELECT *
FROM fund AS a LEFT JOIN dateadd_mstar AS b
ON a.id1=b.id1;
QUIT;

/*** barclay hedge ***/

PROC IMPORT OUT=barclayperformance_live DATAFILE= "d:\research\testosterone\barclay\barclayhedge_live.xlsx" 
DBMS=xlsx REPLACE;
SHEET="Performance"; 
GETNAMES=YES;
RUN;
PROC IMPORT OUT=barclayperformance_dead DATAFILE= "d:\research\testosterone\barclay\barclayhedge_dead.xlsx" 
DBMS=xlsx REPLACE;
SHEET="Performance"; 
GETNAMES=YES;
RUN;

data barclayperformance;
set barclayperformance_live barclayperformance_dead;
run;
proc sort data=barclayperformance;
by fund_name fund_id fund_type manager_id;
run;
proc transpose data=barclayperformance
out=barclayperformance;
by fund_name fund_id fund_type manager_id;
run;

data barclayperformance;
set barclayperformance;
year=substr(_label_,5,4);
month=substr(_label_,1,3);
rename col1=return ;
run;
data barclayperformance;
set barclayperformance;
where return not=.;
drop fund_name;
run;

data barclayperformance;
set barclayperformance;
if year='1994' then year1=1994;
if year='1995' then year1=1995;
if year='1996' then year1=1996;
if year='1997' then year1=1997;
if year='1998' then year1=1998;
if year='1999' then year1=1999;
if year='2000' then year1=2000;
if year='2001' then year1=2001;
if year='2002' then year1=2002;
if year='2003' then year1=2003;
if year='2004' then year1=2004;
if year='2005' then year1=2005;
if year='2006' then year1=2006;
if year='2007' then year1=2007;
if year='2008' then year1=2008;
if year='2009' then year1=2009;
if year='2010' then year1=2010;
if year='2011' then year1=2011;
if year='2012' then year1=2012;
if year='2013' then year1=2013;
if year='2014' then year1=2014;
if year='2015' then year1=2015;
if year='2016' then year1=2016;
if month='Jan' then month1=1;
if month='Feb' then month1=2;
if month='Mar' then month1=3;
if month='Apr' then month1=4;
if month='May' then month1=5;
if month='Jun' then month1=6;
if month='Jul' then month1=7;
if month='Aug' then month1=8;
if month='Sep' then month1=9;
if month='Oct' then month1=10;
if month='Nov' then month1=11;
if month='Dec' then month1=12;
return=return/100;
run;

data barclayperformance;
set barclayperformance;
drop _name_ _label_ year month;
run;

proc sort data=barclayperformance out=hf_ret;
by fund_id;
run;
data hf_ret;
set hf_ret;
by fund_id;
retain id 0;
if first.fund_id then id+1;
run;
data hf_ret;
set hf_ret;
id1=ceil(id/20);
temp=1;
run;
proc sort data=hf_ret;
by id1 year1 month1 id;
run;
/*** within each group, get the month with most returns ***/
proc sql; 
create table dateadd_barclay as
select *, sum(temp) as ret_count
from hf_ret
group by id1,year1,month1;
quit;

proc sort nodupkeys;
by id1 year1 month1;
run;
proc sort;
by id1 descending ret_count;
run;
proc sort nodupkeys;
by id1;
run;
proc sort data=hf_ret out=fund nodupkeys;
by fund_id;
run;
data dateadd_barclay;
set dateadd_barclay;
rename year1=addyear month1=addmonth;
keep id1 year1 month1; 
run;
data fund;
set fund;
keep fund_id id1;
run;
PROC SQL; 
CREATE TABLE dateadd_barclay AS
SELECT *
FROM fund AS a LEFT JOIN dateadd_barclay AS b
ON a.id1=b.id1;
QUIT;

/*** combine four adddate files to the baseline file ***/
data dateadd_tass;
set dateadd_tass;
rename addyear=addyear_tass addmonth=addmonth_tass;
run;
data dateadd_mstar;
set dateadd_mstar;
rename addyear=addyear_mstar addmonth=addmonth_mstar;
run;
data dateadd_hfr;
set dateadd_hfr;
rename addyear=addyear_hfr addmonth=addmonth_hfr;
run;
data dateadd_barclay;
set dateadd_barclay;
rename addyear=addyear_barclay addmonth=addmonth_barclay;
run;


PROC SQL; 
CREATE TABLE fund_performance_98_4 AS
SELECT *
FROM fund_performance_98_4 AS a LEFT JOIN dateadd_tass AS b
ON a.fund_id_tass=b.productreference;
QUIT;
PROC SQL; 
CREATE TABLE fund_performance_98_4 AS
SELECT *
FROM fund_performance_98_4 AS a LEFT JOIN dateadd_mstar AS b
ON a.fund_id_mstar=b.secid;
QUIT;
PROC SQL; 
CREATE TABLE fund_performance_98_4 AS
SELECT *
FROM fund_performance_98_4 AS a LEFT JOIN dateadd_hfr AS b
ON a.fund_id_hfr=b.fund_id;
QUIT;
PROC SQL; 
CREATE TABLE fund_performance_98_4 AS
SELECT *
FROM fund_performance_98_4 AS a LEFT JOIN dateadd_barclay AS b
ON a.fund_id_barclay=b.fund_id;
QUIT;

data fund_performance_98_4;
set fund_performance_98_4;
addyear=min(addyear_tass, addyear_mstar, addyear_hfr, addyear_barclay);
addmonth=min(addmonth_tass, addmonth_mstar, addmonth_hfr, addmonth_barclay);
run;

data fund_performance_98_4;
set fund_performance_98_4;
if year*100+month<addyear*100+addmonth then delete;
run;


	%macro winsor(dsetin=, dsetout=, byvar=none, vars=, type=winsor, pctl=1 99);
	  
	%if &dsetout = %then %let dsetout = &dsetin;
	     
	%let varL=;
	%let varH=;
	%let xn=1;
	  
	%do %until ( %scan(&vars,&xn)= );
	    %let token = %scan(&vars,&xn);
	    %let varL = &varL &token.L;
	    %let varH = &varH &token.H;
	    %let xn=%EVAL(&xn + 1);
	%end;
	  
	%let xn=%eval(&xn-1);
	  
	data xtemp;
	    set &dsetin;
	    run;
	  
	%if &byvar = none %then %do;
	  
	    data xtemp;
	        set xtemp;
	        xbyvar = 1;
	        run;
	  
	    %let byvar = xbyvar;
	  
	%end;
	  
	proc sort data = xtemp;
	    by &byvar;
	    run;
	  
	proc univariate data = xtemp noprint;
	    by &byvar;
	    var &vars;
	    output out = xtemp_pctl PCTLPTS = &pctl PCTLPRE = &vars PCTLNAME = L H;
	    run;
	  
	data &dsetout;
	    merge xtemp xtemp_pctl;
	    by &byvar;
	    array trimvars{&xn} &vars;
	    array trimvarl{&xn} &varL;
	    array trimvarh{&xn} &varH;
	  
	    do xi = 1 to dim(trimvars);
	  
	        %if &type = winsor %then %do;
	            if not missing(trimvars{xi}) then do;
	              if (trimvars{xi} < trimvarl{xi}) then trimvars{xi} = trimvarl{xi};
	              if (trimvars{xi} > trimvarh{xi}) then trimvars{xi} = trimvarh{xi};
	            end;
	        %end;
	  
	        %else %do;
	            if not missing(trimvars{xi}) then do;
	              if (trimvars{xi} < trimvarl{xi}) then delete;
	              if (trimvars{xi} > trimvarh{xi}) then delete;
	            end;
	        %end;
	  
	    end;
	    drop &varL &varH xbyvar xi;
	    run;
	  
	%mend winsor;

/*** clean of collected data ***/
data hf_performance;
set fund_performance_98_4;
first=scan(manager_name,1);
last=scan(manager_name,-1);
return_pre=return+managementfee/100+incentivefee/100*return;
run;
proc export data=hf_performance
outfile='d:\research\networking\hf_performance_full_backfill.csv'
dbms=csv
replace;
run;


/*** get the firm-total manager count***/
proc sort data=network_combine out=firm_manager nodupkeys;
by firmname;
run;
data firm_namager;
set firm_manager;
keep firmname;
run;

/*** our searched sample has 2793 firms ***/
/*** get total manager count of thiese 2793 firms ***/
proc sql;
	  create table firm_manager as
	  select * 
	  from hf_performance, firm_manager
	  where hf_performance.companyname=firm_manager.firmname;
quit;
data manager_time;
set manager_time;
where first_name not="" and last_name not="";
run;

proc sql;
  create table firm_manager as
  select * 
  from firm_manager as a left join manager_time as b
  on a.firmname=b.firmname and a.first=b.first_name and a.last=b.last_name;
quit;

data firm_manager;
set firm_manager;
if first=first_name and last=last_name and endyear_work not=. and startyear_work not=. and year<startyear_work then drop=1; else if first=first_name and last=last_name and endyear_work not=. and startyear_work not=. and year>startyear_work then drop=1; else drop=0;
run;

data firm_manager;
set firm_manager;
where drop=0;
run;

proc sort data=firm_manager out=firm_manager nodupkeys;
by firmname year manager_name;
run;
data firm_manager;
set firm_manager;
manager_count=1;
run;
proc sql; 
create table firm_manager as
select *, sum(manager_count) as totalmanager
from firm_manager
group by companyname,year;
quit;
proc sort data=firm_manager nodupkeys;
by firmname year;
run;
data firm_manager;
set firm_manager;
keep firmname year totalmanager;
run;

/*** 1292 firms of the sample***/

proc sql;
  create table firm_manager1 as
  select * 
  from firm_manager as a left join homo_match_edu1 as b
  on a.firmname=b.firmname;
quit;

proc sql;
  create table firm_manager1 as
  select * 
  from firm_manager1 as a left join homo_match_work1 as b
  on a.firmname=b.firmname;
quit;

data firm_manager1;
set firm_manager1;
if sumhomo_edu=. then sumhomo_edu=0;
if sumhomo_work=. then sumhomo_work=0;
run;
data firm_manager1;
set firm_manager1;
homo_edu_ratio=min(1,sumhomo_edu/totalmanager);
homo_work_ratio=min(1,sumhomo_work/totalmanager);
run;
proc means N mean std p25 median p75 data=firm_manager1;
var homo_edu_ratio homo_work_ratio;
run;
proc sql;
  create table hf_performance_homo as
  select * 
  from hf_performance as a left join firm_manager1 as b
  on a.companyname=b.firmname and a.year=b.year;
quit;

data hf_performance_homo;
set hf_performance_homo;
where firmname not="";
run;


/*** combine with seven factors and run rolling window regressions ***/
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_homo;
set hf_performance_homo;
date=year*100+month;
run;

proc sql;
create table hf_performance_homo_7factor as
select * 
from hf_performance_homo, sevenfactors
where hf_performance_homo.year=sevenfactors.year and hf_performance_homo.month=sevenfactors.month;
quit;

proc sort data=hf_performance_homo_7factor nodupkeys;
by fund_id year month;
run;

data hf_performance_homo_7factor;
set hf_performance_homo_7factor;
count=1;
run;
proc sql; 
create table hf_performance_homo_7factor as
select *, sum(count)as totcount
from hf_performance_homo_7factor
group by fund_id;
quit;

proc export data=hf_performance_homo_7factor
outfile='d:\research\networking\hf_performance_homo_7factor.csv'
dbms=csv
replace;
run;
proc import datafile="d:\research\networking\rolling_alpha.csv" dbms=CSV out=rolling_alpha replace;
getnames=yes;
guessingrows=50000;
run;
proc import datafile="d:\research\networking\rolling_alpha_pre.csv" dbms=CSV out=rolling_alpha_pre replace;
getnames=yes;
guessingrows=50000;
run;
proc import datafile="d:\research\networking\rolling_alpha_capm.csv" dbms=CSV out=rolling_alpha_capm replace;
getnames=yes;
guessingrows=50000;
run;

proc sort data=rolling_alpha;
by fund_id yyyymm;
run;

data  rolling_alpha(drop=i count);
set  rolling_alpha;
by fund_id;
array x(*) estimatedassets_lag1;
estimatedassets_lag1=lag1(size);
if first.fund_id then count=1;
do i=count to dim(x);
x(i)=.;
end;
count + 1;
run;

data  rolling_alpha;
set  rolling_alpha;
flow=(size-estimatedassets_lag1*(1+return))/estimatedassets_lag1;
run;
%winsor(dsetin=rolling_alpha, byvar=none, vars=flow, type=winsor, pctl=0.2 99.8);run;

PROC SQL; 
CREATE TABLE hf_performance_homo AS
SELECT *
FROM hf_performance_homo AS a LEFT JOIN rolling_alpha AS b
ON a.fund_id=b.fund_id and a.date=b.yyyymm;
QUIT;
PROC SQL; 
CREATE TABLE hf_performance_homo AS
SELECT *
FROM hf_performance_homo AS a LEFT JOIN rolling_alpha_pre AS b
ON a.fund_id=b.fund_id and a.date=b.yyyymm;
QUIT;
PROC SQL; 
CREATE TABLE hf_performance_homo AS
SELECT *
FROM hf_performance_homo AS a LEFT JOIN rolling_alpha_capm AS b
ON a.fund_id=b.fund_id and a.date=b.yyyymm;
QUIT;

data hf_performance_homo;
set hf_performance_homo;
alpha=return-_b_ptfsbd*ptfsbd-_b_ptfsfx*ptfsfx-_b_ptfscom*ptfscom-_b_mktrf*mktrf-_b_smb*smb-_b_tenyrcmtchange*tenyrcmtchange-_b_baacreditchange*baacreditchange;
alpha_pre=return_pre-_b_ptfsbd1*ptfsbd-_b_ptfsfx1*ptfsfx-_b_ptfscom1*ptfscom-_b_mktrf1*mktrf-_b_smb1*smb-_b_tenyrcmtchange1*tenyrcmtchange-_b_baacreditchange1*baacreditchange;
alpha_capm=return-_b_mktrf2*mktrf;
skill=(return_pre-_b_ptfsbd*ptfsbd-_b_ptfsfx*ptfsfx-_b_ptfscom*ptfscom-_b_mktrf*mktrf-_b_smb*smb-_b_tenyrcmtchange*tenyrcmtchange-_b_baacreditchange*baacreditchange)*estimatedassets_lag1;
where strategy not="Fund of Funds";
run;
proc sort;by fund_id manager_name;run;
proc sql; 
create table hf_performance_homo1 as
select *, mean(return) as meanreturn, mean(flow) as meanflow, mean(alpha) as meanalpha,std(return) as totrisk, std(alpha) as idiorisk
from hf_performance_homo
group by fund_id, manager_name;
quit;

proc sort data=hf_performance_homo;
by date;
run;

proc rank groups=100 data=hf_performance_homo out=hf_performance_homo; 
var homo_edu_ratio;
ranks homoedu_rank100;
by date;
run;

proc rank groups=100 data=hf_performance_homo out=hf_performance_homo; 
var homo_work_ratio;
ranks homowork_rank100;
by date;
run;
data hf_performance_homo;
set hf_performance_homo;
homoedu_rank100=homoedu_rank100/100;
homowork_rank100=homowork_rank100/100;
run;

proc sort nodupkeys;
by fund_id date;
run;
data hf_performance_meanhomo;
set hf_performance_homo;
by fund_id;
RETAIN seq;
IF first.fund_id THEN seq=0;
seq+1;
RUN;
data hf_performance_meanhomo;
set hf_performance_meanhomo;
seq1=ceil(seq/24);
incepyear=int(inceptiondate/100);
incepmonth=inceptiondate-int(inceptiondate/100)*100;
age_hf=(year*12+month)-(incepyear*12+incepmonth);
run;
/*** compute risk and mean performance for every 24 months ***/

data hf_performance_meanhomo;
set hf_performance_meanhomo;
ret_1=return+1;
fund_id_seq=fund_id||seq1;
run;
data hf_performance_meanhomo;
set hf_performance_meanhomo;
by fund_id_seq;
RETAIN cum;
IF first.fund_id_seq THEN cum=1;
cum=cum*ret_1;
RUN;
data hf_performance_meanhomo;
set hf_performance_meanhomo;
cum_loss=cum-1;
run;
data hf_performance_meanhomo;
set hf_performance_meanhomo;
if return<0 then ret_neg=return;else ret_neg=.;
if return<0 then count_neg=1;
run;
proc sql; 
create table hf_performance_meanhomo as
select *, mean(return) as meanreturn,mean(flow) as meanflow, mean(alpha) as meanalpha, std(return) as totrisk, std(alpha) as idiorisk, min(return) as maxloss, 
min(cum_loss) as maxdrawdown, sum(ret_neg*ret_neg) as sumretneg, sum(count_neg) as negtotcount, -6*log(mean((1+return)*sqrt(1+rf))) as mppm, max(_eq2_r2) as maxrsq
from hf_performance_meanhomo
group by fund_id, seq1;
quit;


data hf_performance_meanhomo;
set hf_performance_meanhomo;
neg_dev=sqrt(sumretneg/negtotcount);
neg_percent=negtotcount/24;
ret2=round(return,0.001);
run;

proc sql;
create table hf_performance_meanhomo_dup as
   select *, count(ret2) as Countret
      from hf_performance_meanhomo
      group by fund_id, seq1,ret2
      having countret > 1;
quit;
data hf_performance_meanhomo_dup;
set hf_performance_meanhomo_dup;
keep fund_id_seq countret;
run;
proc sort nodupkeys;
by fund_id_seq;
run;
PROC SQL; 
CREATE TABLE hf_performance_meanhomo AS
SELECT *
FROM hf_performance_meanhomo AS a LEFT JOIN hf_performance_meanhomo_dup AS b
ON a.fund_id_seq=b.fund_id_seq;
QUIT;

proc sort data=hf_performance_meanhomo;
by fund_id date;
run;
data hf_performance_meanhomo1;
set hf_performance_meanhomo;
keep fund_id date return alpha flow alpha_capm;
run;

data  hf_performance_meanhomo1(drop=i count);
set hf_performance_meanhomo1;
by fund_id;
array x(*) return_lag1-return_lag48;
return_lag1=lag1(return);
return_lag2=lag2(return);
return_lag3=lag3(return);
return_lag4=lag4(return);
return_lag5=lag5(return);
return_lag6=lag6(return);
return_lag7=lag7(return);
return_lag8=lag8(return);
return_lag9=lag9(return);
return_lag10=lag10(return);
return_lag11=lag11(return);
return_lag12=lag12(return);
return_lag13=lag13(return);
return_lag14=lag14(return);
return_lag15=lag15(return);
return_lag16=lag16(return);
return_lag17=lag17(return);
return_lag18=lag18(return);
return_lag19=lag19(return);
return_lag20=lag20(return);
return_lag21=lag21(return);
return_lag22=lag22(return);
return_lag23=lag23(return);
return_lag24=lag24(return);
return_lag25=lag25(return);
return_lag26=lag26(return);
return_lag27=lag27(return);
return_lag28=lag28(return);
return_lag29=lag29(return);
return_lag30=lag30(return);
return_lag31=lag31(return);
return_lag32=lag32(return);
return_lag33=lag33(return);
return_lag34=lag34(return);
return_lag35=lag35(return);
return_lag36=lag36(return);
return_lag37=lag37(return);
return_lag38=lag38(return);
return_lag39=lag39(return);
return_lag40=lag40(return);
return_lag41=lag41(return);
return_lag42=lag42(return);
return_lag43=lag43(return);
return_lag44=lag44(return);
return_lag45=lag45(return);
return_lag46=lag46(return);
return_lag47=lag47(return);
return_lag48=lag48(return);
if first.fund_id then count=1;
do i=count to dim(x);
x(i)=.;
end;
count + 1;
run;

data  hf_performance_meanhomo1(drop=i count);
set hf_performance_meanhomo1;
by fund_id;
array x(*) alpha_lag1-alpha_lag48;
alpha_lag1=lag1(alpha);
alpha_lag2=lag2(alpha);
alpha_lag3=lag3(alpha);
alpha_lag4=lag4(alpha);
alpha_lag5=lag5(alpha);
alpha_lag6=lag6(alpha);
alpha_lag7=lag7(alpha);
alpha_lag8=lag8(alpha);
alpha_lag9=lag9(alpha);
alpha_lag10=lag10(alpha);
alpha_lag11=lag11(alpha);
alpha_lag12=lag12(alpha);
alpha_lag13=lag13(alpha);
alpha_lag14=lag14(alpha);
alpha_lag15=lag15(alpha);
alpha_lag16=lag16(alpha);
alpha_lag17=lag17(alpha);
alpha_lag18=lag18(alpha);
alpha_lag19=lag19(alpha);
alpha_lag20=lag20(alpha);
alpha_lag21=lag21(alpha);
alpha_lag22=lag22(alpha);
alpha_lag23=lag23(alpha);
alpha_lag24=lag24(alpha);
alpha_lag25=lag25(alpha);
alpha_lag26=lag26(alpha);
alpha_lag27=lag27(alpha);
alpha_lag28=lag28(alpha);
alpha_lag29=lag29(alpha);
alpha_lag30=lag30(alpha);
alpha_lag31=lag31(alpha);
alpha_lag32=lag32(alpha);
alpha_lag33=lag33(alpha);
alpha_lag34=lag34(alpha);
alpha_lag35=lag35(alpha);
alpha_lag36=lag36(alpha);
alpha_lag37=lag37(alpha);
alpha_lag38=lag38(alpha);
alpha_lag39=lag39(alpha);
alpha_lag40=lag40(alpha);
alpha_lag41=lag41(alpha);
alpha_lag42=lag42(alpha);
alpha_lag43=lag43(alpha);
alpha_lag44=lag44(alpha);
alpha_lag45=lag45(alpha);
alpha_lag46=lag46(alpha);
alpha_lag47=lag47(alpha);
alpha_lag48=lag48(alpha);
if first.fund_id then count=1;
do i=count to dim(x);
x(i)=.;
end;
count + 1;
run;

data  hf_performance_meanhomo1(drop=i count);
set hf_performance_meanhomo1;
by fund_id;
array x(*) alpha_capm_lag1-alpha_capm_lag24;
alpha_capm_lag1=lag1(alpha_capm);
alpha_capm_lag2=lag2(alpha_capm);
alpha_capm_lag3=lag3(alpha_capm);
alpha_capm_lag4=lag4(alpha_capm);
alpha_capm_lag5=lag5(alpha_capm);
alpha_capm_lag6=lag6(alpha_capm);
alpha_capm_lag7=lag7(alpha_capm);
alpha_capm_lag8=lag8(alpha_capm);
alpha_capm_lag9=lag9(alpha_capm);
alpha_capm_lag10=lag10(alpha_capm);
alpha_capm_lag11=lag11(alpha_capm);
alpha_capm_lag12=lag12(alpha_capm);
alpha_capm_lag13=lag13(alpha_capm);
alpha_capm_lag14=lag14(alpha_capm);
alpha_capm_lag15=lag15(alpha_capm);
alpha_capm_lag16=lag16(alpha_capm);
alpha_capm_lag17=lag17(alpha_capm);
alpha_capm_lag18=lag18(alpha_capm);
alpha_capm_lag19=lag19(alpha_capm);
alpha_capm_lag20=lag20(alpha_capm);
alpha_capm_lag21=lag21(alpha_capm);
alpha_capm_lag22=lag22(alpha_capm);
alpha_capm_lag23=lag23(alpha_capm);
alpha_capm_lag24=lag24(alpha_capm);
if first.fund_id then count=1;
do i=count to dim(x);
x(i)=.;
end;
count + 1;
run;

data  hf_performance_meanhomo1(drop=i count);
set hf_performance_meanhomo1;
by fund_id;
array x(*) flow_lag1-flow_lag36;
flow_lag1=lag1(flow);
flow_lag2=lag2(flow);
flow_lag3=lag3(flow);
flow_lag4=lag4(flow);
flow_lag5=lag5(flow);
flow_lag6=lag6(flow);
flow_lag7=lag7(flow);
flow_lag8=lag8(flow);
flow_lag9=lag9(flow);
flow_lag10=lag10(flow);
flow_lag11=lag11(flow);
flow_lag12=lag12(flow);
flow_lag13=lag13(flow);
flow_lag14=lag14(flow);
flow_lag15=lag15(flow);
flow_lag16=lag16(flow);
flow_lag17=lag17(flow);
flow_lag18=lag18(flow);
flow_lag19=lag19(flow);
flow_lag20=lag20(flow);
flow_lag21=lag21(flow);
flow_lag22=lag22(flow);
flow_lag23=lag23(flow);
flow_lag24=lag24(flow);
flow_lag25=lag25(flow);
flow_lag26=lag26(flow);
flow_lag27=lag27(flow);
flow_lag28=lag28(flow);
flow_lag29=lag29(flow);
flow_lag30=lag30(flow);
flow_lag31=lag31(flow);
flow_lag32=lag32(flow);
flow_lag33=lag33(flow);
flow_lag34=lag34(flow);
flow_lag35=lag35(flow);
flow_lag36=lag36(flow);
if first.fund_id then count=1;
do i=count to dim(x);
x(i)=.;
end;
count + 1;
run;
data hf_performance_meanhomo1;
set hf_performance_meanhomo1;
lag12return=mean(of return_lag1-return_lag12);
lag12alpha=mean(of alpha_lag1-alpha_lag12);
lag24return=mean(of return_lag1-return_lag24);
lag36return=mean(of return_lag1-return_lag36);
lag48return=mean(of return_lag1-return_lag48);
lag24alpha=mean(of alpha_lag1-alpha_lag24);
lag36alpha=mean(of alpha_lag1-alpha_lag36);
lag48alpha=mean(of alpha_lag1-alpha_lag48);
lag24alpha_capm=mean(of alpha_capm_lag1-alpha_capm_lag24);
lag1324return=mean(of return_lag13-return_lag24);
lag1324alpha=mean(of alpha_lag13-alpha_lag24);
lag12flow=mean(of flow_lag1-flow_lag12);
lag24flow=mean(of flow_lag1-flow_lag24);
lag36flow=mean(of flow_lag1-flow_lag36);
lag1324flow=mean(of flow_lag13-flow_lag24);
run;

proc sort data=hf_performance_meanhomo1;
by fund_id descending date;
run;
data  hf_performance_meanhomo1(drop=i count);
set hf_performance_meanhomo1;
by fund_id;
array x(*) return_lead1-return_lead48;
return_lead1=lag1(return);
return_lead2=lag2(return);
return_lead3=lag3(return);
return_lead4=lag4(return);
return_lead5=lag5(return);
return_lead6=lag6(return);
return_lead7=lag7(return);
return_lead8=lag8(return);
return_lead9=lag9(return);
return_lead10=lag10(return);
return_lead11=lag11(return);
return_lead12=lag12(return);
return_lead13=lag13(return);
return_lead14=lag14(return);
return_lead15=lag15(return);
return_lead16=lag16(return);
return_lead17=lag17(return);
return_lead18=lag18(return);
return_lead19=lag19(return);
return_lead20=lag20(return);
return_lead21=lag21(return);
return_lead22=lag22(return);
return_lead23=lag23(return);
return_lead24=lag24(return);
return_lead25=lag25(return);
return_lead26=lag26(return);
return_lead27=lag27(return);
return_lead28=lag28(return);
return_lead29=lag29(return);
return_lead30=lag30(return);
return_lead31=lag31(return);
return_lead32=lag32(return);
return_lead33=lag33(return);
return_lead34=lag34(return);
return_lead35=lag35(return);
return_lead36=lag36(return);
return_lead37=lag37(return);
return_lead38=lag38(return);
return_lead39=lag39(return);
return_lead40=lag40(return);
return_lead41=lag41(return);
return_lead42=lag42(return);
return_lead43=lag43(return);
return_lead44=lag44(return);
return_lead45=lag45(return);
return_lead46=lag46(return);
return_lead47=lag47(return);
return_lead48=lag48(return);
if first.fund_id then count=1;
do i=count to dim(x);
x(i)=.;
end;
count + 1;
run;
data  hf_performance_meanhomo1(drop=i count);
set hf_performance_meanhomo1;
by fund_id;
array x(*) alpha_lead1-alpha_lead48;
alpha_lead1=lag1(alpha);
alpha_lead2=lag2(alpha);
alpha_lead3=lag3(alpha);
alpha_lead4=lag4(alpha);
alpha_lead5=lag5(alpha);
alpha_lead6=lag6(alpha);
alpha_lead7=lag7(alpha);
alpha_lead8=lag8(alpha);
alpha_lead9=lag9(alpha);
alpha_lead10=lag10(alpha);
alpha_lead11=lag11(alpha);
alpha_lead12=lag12(alpha);
alpha_lead13=lag13(alpha);
alpha_lead14=lag14(alpha);
alpha_lead15=lag15(alpha);
alpha_lead16=lag16(alpha);
alpha_lead17=lag17(alpha);
alpha_lead18=lag18(alpha);
alpha_lead19=lag19(alpha);
alpha_lead20=lag20(alpha);
alpha_lead21=lag21(alpha);
alpha_lead22=lag22(alpha);
alpha_lead23=lag23(alpha);
alpha_lead24=lag24(alpha);
alpha_lead25=lag25(alpha);
alpha_lead26=lag26(alpha);
alpha_lead27=lag27(alpha);
alpha_lead28=lag28(alpha);
alpha_lead29=lag29(alpha);
alpha_lead30=lag30(alpha);
alpha_lead31=lag31(alpha);
alpha_lead32=lag32(alpha);
alpha_lead33=lag33(alpha);
alpha_lead34=lag34(alpha);
alpha_lead35=lag35(alpha);
alpha_lead36=lag36(alpha);
alpha_lead37=lag37(alpha);
alpha_lead38=lag38(alpha);
alpha_lead39=lag39(alpha);
alpha_lead40=lag40(alpha);
alpha_lead41=lag41(alpha);
alpha_lead42=lag42(alpha);
alpha_lead43=lag43(alpha);
alpha_lead44=lag44(alpha);
alpha_lead45=lag45(alpha);
alpha_lead46=lag46(alpha);
alpha_lead47=lag47(alpha);
alpha_lead48=lag48(alpha);
if first.fund_id then count=1;
do i=count to dim(x);
x(i)=.;
end;
count + 1;
run;

data  hf_performance_meanhomo1(drop=i count);
set hf_performance_meanhomo1;
by fund_id;
array x(*) flow_lead1-flow_lead12;
flow_lead1=lag1(flow);
flow_lead2=lag2(flow);
flow_lead3=lag3(flow);
flow_lead4=lag4(flow);
flow_lead5=lag5(flow);
flow_lead6=lag6(flow);
flow_lead7=lag7(flow);
flow_lead8=lag8(flow);
flow_lead9=lag9(flow);
flow_lead10=lag10(flow);
flow_lead11=lag11(flow);
flow_lead12=lag12(flow);
if first.fund_id then count=1;
do i=count to dim(x);
x(i)=.;
end;
count + 1;
run;
data hf_performance_meanhomo1;
set hf_performance_meanhomo1;
lead12return=mean(of return_lead1-return_lead12);
lead24return=mean(of return_lead1-return_lead24);
lead36return=mean(of return_lead1-return_lead36);
lead48return=mean(of return_lead1-return_lead48);
lead12alpha=mean(of alpha_lead1-alpha_lead12);
lead24alpha=mean(of alpha_lead1-alpha_lead24);
lead36alpha=mean(of alpha_lead1-alpha_lead36);
lead48alpha=mean(of alpha_lead1-alpha_lead48);
lead12flow=mean(of flow_lead1-flow_lead12);
run;

proc sort data=hf_performance_meanhomo1;
by fund_id date;
run;
PROC SQL; 
CREATE TABLE hf_performance_meanhomo AS
SELECT *
FROM hf_performance_meanhomo AS a LEFT JOIN hf_performance_meanhomo1 AS b
ON a.fund_id=b.fund_id and a.date=b.date;
QUIT;

proc export data=hf_performance_meanhomo
outfile='d:\research\networking\hf_performance_portfolio.csv'
dbms=csv
replace;
run;
proc sort data=hf_performance_meanhomo nodupkeys;
by fund_id seq1;
run;
proc export data=hf_performance_meanhomo
outfile='d:\research\networking\hf_performance_meanhomo.csv'
dbms=csv
replace;
run;

/*** CALENDAR TIME PORTFOLIO Base on fWHr ***/
/*** step 1 form portfolio every Jan 1 ***/
proc import datafile="d:\research\networking\hf_performance_portfolio.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2;
run;
Proc means mean p25 median p75 std;
var size;
run;
proc sort out=funds nodupkeys;
by fund_id;
run;
data funds;
set funds;
diver_edu_ratio=1-homo_edu_ratio;
run;
proc means N mean p25 median p75 std;
var diver_edu_ratio;
run;
proc means N mean p25 median p75 std;
var diver_edu_ratio;
class strategy;
run;

proc sort data=hf_performance_portfolio;
by year month;
run;
data hf_performance_portfolio_early;
set hf_performance_portfolio;
where year<=2004;
run;
proc sort out=funds nodupkeys;
by fund_id;
run;
data hf_performance_portfolio_late;
set hf_performance_portfolio;
where year>2004;
run;
proc sort out=funds nodupkeys;
by fund_id;
run;


data hf_performance_portfolio;
set hf_performance_portfolio;
drop homoedu_rank meanreturn meanalpha meanflow;
run;
proc rank groups=3 data=hf_performance_portfolio out=hf_performance_portfolio1; 
var homo_edu_ratio;
ranks homoedu_rank;
/*** where month=1; ***/
where month=1 & return_lead12 not=. and homo_edu_ratio not=1 and homo_edu_ratio not=0; 
by year;
run;

data hf_performance_portfolio1;
set hf_performance_portfolio1;
homoedu_rank1=2+homoedu_rank;
keep fund_id year homoedu_rank1;
run;
PROC SQL; 
CREATE TABLE hf_performance_portfolio AS
SELECT *
FROM hf_performance_portfolio AS a LEFT JOIN hf_performance_portfolio1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;
data hf_performance_portfolio;
set hf_performance_portfolio;
if homo_edu_ratio=1 then homoedu_rank1=1;
if homo_edu_ratio=0 then homoedu_rank1=5;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where homoedu_rank1 not=.;
countyr=1;
run;

data sort1;set hf_performance_portfolio;where homoedu_rank1=1;run;
proc sort nodupkeys;by year fund_id;run;
proc sql; 
create table sort1 as
select *, sum(countyr) as totcountyr
from sort1
group by year;
quit;
proc sort nodupkeys;by year;run;
proc means;
var totcountyr;
run;

data sort2;set hf_performance_portfolio;where homoedu_rank1=2;run;
proc sort nodupkeys;by year fund_id;run;
proc sql; 
create table sort2 as
select *, sum(countyr) as totcountyr
from sort2
group by year;
quit;
proc sort nodupkeys;by year;run;
proc means;
var totcountyr;
run;

data sort3;set hf_performance_portfolio;where homoedu_rank1=3;run;
proc sort nodupkeys;by year fund_id;run;
proc sql; 
create table sort3 as
select *, sum(countyr) as totcountyr
from sort3
group by year;
quit;
proc sort nodupkeys;by year;run;
proc means;
var totcountyr;
run;

data sort4;set hf_performance_portfolio;where homoedu_rank1=4;run;
proc sort nodupkeys;by year fund_id;run;
proc sql; 
create table sort4 as
select *, sum(countyr) as totcountyr
from sort4
group by year;
quit;
proc sort nodupkeys;by year;run;
proc means;
var totcountyr;
run;

data sort5;set hf_performance_portfolio;where homoedu_rank1=5;run;
proc sort nodupkeys;by year fund_id;run;
proc sql; 
create table sort5 as
select *, sum(countyr) as totcountyr
from sort5
group by year;
quit;
proc sort nodupkeys;by year;run;
proc means;
var totcountyr;
run;


proc sql; 
create table hf_performance_portfolio as
select *, mean(return) as meanreturn,mean(alpha) as meanalpha
from hf_performance_portfolio
group by homoedu_rank1, year,month;
quit;
proc sort nodupkeys; by homoedu_rank1 year month;run;

data hf_performance_portfolio10; set hf_performance_portfolio; run;

data hf_performance_portfolio10;
set hf_performance_portfolio10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data sevenfactors1;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
data sevenfactors1;
set sevenfactors1;
rename mktrf=mktrf1 smb=smb1 tenyrcmtchange=tenyrcmtchange1 baacreditchange=baacreditchange1 ptfsbd=ptfsbd1 ptfsfx=ptfsfx1 ptfscom=ptfscom1;
run;
data sevenfactors1;
set sevenfactors1;
if yyyymm>200004 then mktrf1=0;
if yyyymm>200004 then smb1=0;
if yyyymm>200004 then tenyrcmtchange1=0;
if yyyymm>200004 then baacreditchange1=0;
if yyyymm>200004 then ptfsbd1=0;
if yyyymm>200004 then ptfsfx1=0;
if yyyymm>200004 then ptfscom1=0;
run;

data sevenfactors2;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
data sevenfactors2;
set sevenfactors2;
rename mktrf=mktrf2 smb=smb2 tenyrcmtchange=tenyrcmtchange2 baacreditchange=baacreditchange2 ptfsbd=ptfsbd2 ptfsfx=ptfsfx2 ptfscom=ptfscom2;
run;
data sevenfactors2;
set sevenfactors2;
if yyyymm<=200004 or yyyymm>200809  then mktrf2=0;
if yyyymm<=200004 or yyyymm>200809 then smb2=0;
if yyyymm<=200004 or yyyymm>200809 then tenyrcmtchange2=0;
if yyyymm<=200004 or yyyymm>200809 then baacreditchange2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfsbd2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfsfx2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfscom2=0;
run;

data sevenfactors3;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
data sevenfactors3;
set sevenfactors3;
rename mktrf=mktrf3 smb=smb3 tenyrcmtchange=tenyrcmtchange3 baacreditchange=baacreditchange3 ptfsbd=ptfsbd3 ptfsfx=ptfsfx3 ptfscom=ptfscom3;
run;
data sevenfactors3;
set sevenfactors3;
if yyyymm<=200809 then mktrf3=0;
if yyyymm<=200809 then smb3=0;
if yyyymm<=200809 then tenyrcmtchange3=0;
if yyyymm<=200809 then baacreditchange3=0;
if yyyymm<=200809 then ptfsbd3=0;
if yyyymm<=200809 then ptfsfx3=0;
if yyyymm<=200809 then ptfscom3=0;
run;

proc import datafile="d:\research\networking\anomaly11.csv" dbms=CSV out=anomaly11 replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio10;
set hf_performance_portfolio10;
date=year*100+month;
run;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors
where hf_performance_portfolio10.date=sevenfactors.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors1
where hf_performance_portfolio10.date=sevenfactors1.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors2
where hf_performance_portfolio10.date=sevenfactors2.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors3
where hf_performance_portfolio10.date=sevenfactors3.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, anomaly11
where hf_performance_portfolio10.date=anomaly11.yyyymm;
quit;
data hf_performance_portfolio10;
set hf_performance_portfolio10;
excessreturn=meanreturn-rf;
where strategy not="Fund of Funds";
run;
proc means data=hf_performance_portfolio10;
var excessreturn;
by homoedu_rank1;
run;
proc means data=hf_performance_portfolio10;
var homo_edu_ratio;
by homoedu_rank1;
run;
proc ttest data=hf_performance_portfolio10;
var excessreturn;
by homoedu_rank1;
run;
proc ttest;
var meanalpha;
class homoedu_rank1;
where homoedu_rank1=1 or homoedu_rank1=5;
run;

proc means data=hf_performance_portfolio10;
var excessreturn;
by homoedu_rank1;
where year<=2004;
run;
proc means data=hf_performance_portfolio10;
var excessreturn;
by homoedu_rank1;
where year>2004;
run;
proc ttest;
var excessreturn;
by homoedu_rank1;
run;
proc ttest;
var excessreturn;
by homoedu_rank1;
where year<=2004;
run;
proc ttest;
var excessreturn;
by homoedu_rank1;
where year>2004;
run;
proc ttest;
var excessreturn;
class homoedu_rank1;
where homoedu_rank1=1 or homoedu_rank1=5;
run;
data hf_performance_portfolio10_early;
set hf_performance_portfolio10;
where year<=2004;
run;
proc ttest;
var excessreturn;
class homoedu_rank1;
where homoedu_rank1=1 or homoedu_rank1=5;
run;
data hf_performance_portfolio10_late;
set hf_performance_portfolio10;
where year>2004;
run;
proc ttest;
var excessreturn;
class homoedu_rank1;
where homoedu_rank1=1 or homoedu_rank1=5;
run;


proc sort data=hf_performance_portfolio10;by homoedu_rank1;run;

proc export data=hf_performance_portfolio10
outfile='d:\research\networking\hf_performance_portfolio10_edu.csv'
dbms=csv
replace;
run;

data hf_performance_portfolio_1;
set hf_performance_portfolio10;
where homoedu_rank1=1;
rename excessreturn=excessreturn1;
run;

data hf_performance_portfolio_5;
set hf_performance_portfolio10;
where homoedu_rank1=5;
rename excessreturn=excessreturn5;
keep excessreturn date fund_id;
run;

proc sql;
create table hf_performance_portfolio_s as
select * 
from hf_performance_portfolio_1, hf_performance_portfolio_5
where hf_performance_portfolio_1.date=hf_performance_portfolio_5.date;
quit;
data hf_performance_portfolio_s;
set hf_performance_portfolio_s;
excessreturn_s=excessreturn1-excessreturn5;
run;
proc export data=hf_performance_portfolio_s
outfile='d:\research\networking\hf_performance_portfolio_s_edu.csv'
dbms=csv
replace;
run;

proc import datafile="d:\research\networking\hf_performance_portfolio.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2;
run;
proc sort out=funds nodupkeys;
by fund_id;
run;
data funds;
set funds;
diver_work_ratio=1-homo_work_ratio;
run;
proc means N mean p25 median p75 std;
var diver_work_ratio;
run;
proc means N mean p25 median p75 std;
var diver_work_ratio;
class strategy;
run;
proc sort data=hf_performance_portfolio;
by year month;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
drop meanreturn meanalpha meanflow;
run;
proc rank groups=3 data=hf_performance_portfolio out=hf_performance_portfolio1; 
var homo_work_ratio;
ranks homowork_rank;
/*** where month=1; ***/
where month=1 & return_lead12 not=. and homo_work_ratio not=1 and homo_work_ratio not=0; 
by year;
run;
data hf_performance_portfolio1;
set hf_performance_portfolio1;
homowork_rank1=4-homowork_rank;
keep fund_id year homowork_rank1;
run;
PROC SQL; 
CREATE TABLE hf_performance_portfolio AS
SELECT *
FROM hf_performance_portfolio AS a LEFT JOIN hf_performance_portfolio1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;
data hf_performance_portfolio;
set hf_performance_portfolio;
if homo_work_ratio=1 then homowork_rank1=1;
if homo_work_ratio=0 then homowork_rank1=5;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where homowork_rank1 not=.;
countyr=1;
run;
data sort1;set hf_performance_portfolio;where homowork_rank1=1;run;
proc sort nodupkeys;by year fund_id;run;
proc sql; 
create table sort1 as
select *, sum(countyr) as totcountyr
from sort1
group by year;
quit;
proc sort nodupkeys;by year;run;
proc means;
var totcountyr;
run;

data sort2;set hf_performance_portfolio;where homowork_rank1=2;run;
proc sort nodupkeys;by year fund_id;run;
proc sql; 
create table sort2 as
select *, sum(countyr) as totcountyr
from sort2
group by year;
quit;
proc sort nodupkeys;by year;run;
proc means;
var totcountyr;
run;

data sort3;set hf_performance_portfolio;where homowork_rank1=3;run;
proc sort nodupkeys;by year fund_id;run;
proc sql; 
create table sort3 as
select *, sum(countyr) as totcountyr
from sort3
group by year;
quit;
proc sort nodupkeys;by year;run;
proc means;
var totcountyr;
run;

data sort4;set hf_performance_portfolio;where homowork_rank1=4;run;
proc sort nodupkeys;by year fund_id;run;
proc sql; 
create table sort4 as
select *, sum(countyr) as totcountyr
from sort4
group by year;
quit;
proc sort nodupkeys;by year;run;
proc means;
var totcountyr;
run;

data sort5;set hf_performance_portfolio;where homowork_rank1=5;run;
proc sort nodupkeys;by year fund_id;run;
proc sql; 
create table sort5 as
select *, sum(countyr) as totcountyr
from sort5
group by year;
quit;
proc sort nodupkeys;by year;run;
proc means;
var totcountyr;
run;

proc sql; 
create table hf_performance_portfolio as
select *, mean(return) as meanreturn,mean(alpha) as meanalpha
from hf_performance_portfolio
group by homowork_rank1, year,month;
quit;
proc sort nodupkeys; by homowork_rank1 year month;run;

data hf_performance_portfolio10; set hf_performance_portfolio; run;

data hf_performance_portfolio10;
set hf_performance_portfolio10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data sevenfactors1;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
data sevenfactors1;
set sevenfactors1;
rename mktrf=mktrf1 smb=smb1 tenyrcmtchange=tenyrcmtchange1 baacreditchange=baacreditchange1 ptfsbd=ptfsbd1 ptfsfx=ptfsfx1 ptfscom=ptfscom1;
run;
data sevenfactors1;
set sevenfactors1;
if yyyymm>200004 then mktrf1=0;
if yyyymm>200004 then smb1=0;
if yyyymm>200004 then tenyrcmtchange1=0;
if yyyymm>200004 then baacreditchange1=0;
if yyyymm>200004 then ptfsbd1=0;
if yyyymm>200004 then ptfsfx1=0;
if yyyymm>200004 then ptfscom1=0;
run;

data sevenfactors2;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
data sevenfactors2;
set sevenfactors2;
rename mktrf=mktrf2 smb=smb2 tenyrcmtchange=tenyrcmtchange2 baacreditchange=baacreditchange2 ptfsbd=ptfsbd2 ptfsfx=ptfsfx2 ptfscom=ptfscom2;
run;
data sevenfactors2;
set sevenfactors2;
if yyyymm<=200004 or yyyymm>200809  then mktrf2=0;
if yyyymm<=200004 or yyyymm>200809 then smb2=0;
if yyyymm<=200004 or yyyymm>200809 then tenyrcmtchange2=0;
if yyyymm<=200004 or yyyymm>200809 then baacreditchange2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfsbd2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfsfx2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfscom2=0;
run;

data sevenfactors3;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
data sevenfactors3;
set sevenfactors3;
rename mktrf=mktrf3 smb=smb3 tenyrcmtchange=tenyrcmtchange3 baacreditchange=baacreditchange3 ptfsbd=ptfsbd3 ptfsfx=ptfsfx3 ptfscom=ptfscom3;
run;
data sevenfactors3;
set sevenfactors3;
if yyyymm<=200809 then mktrf3=0;
if yyyymm<=200809 then smb3=0;
if yyyymm<=200809 then tenyrcmtchange3=0;
if yyyymm<=200809 then baacreditchange3=0;
if yyyymm<=200809 then ptfsbd3=0;
if yyyymm<=200809 then ptfsfx3=0;
if yyyymm<=200809 then ptfscom3=0;
run;
proc import datafile="d:\research\networking\anomaly11.csv" dbms=CSV out=anomaly11 replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio10;
set hf_performance_portfolio10;
date=year*100+month;
run;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors
where hf_performance_portfolio10.date=sevenfactors.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors1
where hf_performance_portfolio10.date=sevenfactors1.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors2
where hf_performance_portfolio10.date=sevenfactors2.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors3
where hf_performance_portfolio10.date=sevenfactors3.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, anomaly11
where hf_performance_portfolio10.date=anomaly11.yyyymm;
quit;
data hf_performance_portfolio10;
set hf_performance_portfolio10;
excessreturn=meanreturn-rf;
where strategy not="Fund of Funds";
run;
proc means data=hf_performance_portfolio10;
var excessreturn;
by homowork_rank1;
run;
proc ttest data=hf_performance_portfolio10;
var excessreturn;
by homowork_rank1;
run;
proc ttest;
var meanalpha;
class homowork_rank1;
where homowork_rank1=1 or homowork_rank1=5;
run;
proc means data=hf_performance_portfolio10;
var excessreturn;
by homowork_rank1;
where year<=2004;
run;
proc means data=hf_performance_portfolio10;
var excessreturn;
by homowork_rank1;
where year>2004;
run;
proc ttest;
var excessreturn;
by homowork_rank1;
run;
proc ttest;
var excessreturn;
by homowork_rank1;
where year<=2004;
run;
proc ttest;
var excessreturn;
by homowork_rank1;
where year>2004;
run;
proc ttest;
var excessreturn;
class homowork_rank1;
where homowork_rank1=1 or homowork_rank1=5;
run;
data hf_performance_portfolio10_early;
set hf_performance_portfolio10;
where year<=2004;
run;
proc ttest;
var excessreturn;
by homowork_rank1;
run;
proc ttest;
var excessreturn;
class homowork_rank1;
where homowork_rank1=1 or homowork_rank1=5;
run;
data hf_performance_portfolio10_late;
set hf_performance_portfolio10;
where year>2004;
run;
proc ttest;
var excessreturn;
by homowork_rank1;
run;
proc ttest;
var excessreturn;
class homowork_rank1;
where homowork_rank1=1 or homowork_rank1=5;
run;

proc sort data=hf_performance_portfolio10;by homowork_rank1;run;

proc export data=hf_performance_portfolio10
outfile='d:\research\networking\hf_performance_portfolio10_work.csv'
dbms=csv
replace;
run;
data hf_performance_portfolio_1;
set hf_performance_portfolio10;
where homowork_rank1=1;
rename excessreturn=excessreturn1;
run;
data hf_performance_portfolio_5;
set hf_performance_portfolio10;
where homowork_rank1=5;
rename excessreturn=excessreturn5;
keep excessreturn date;
run;
proc sql;
create table hf_performance_portfolio_s as
select * 
from hf_performance_portfolio_1, hf_performance_portfolio_5
where hf_performance_portfolio_1.date=hf_performance_portfolio_5.date;
quit;
data hf_performance_portfolio_s;
set hf_performance_portfolio_s;
excessreturn_s=excessreturn1-excessreturn5;
run;
proc export data=hf_performance_portfolio_s
outfile='d:\research\networking\hf_performance_portfolio_s_work.csv'
dbms=csv
replace;
run;


/*** get nationalities of all names ***/
libname testo 'd:\research\testosterone';
run;
data fund_performance_98_4;
set testo.fund_performance_98_0319_2015;
run;
data fund_performance_98_4;
set fund_performance_98_4;
where duplicate=0;
run;
proc sort nodupkeys;
by manager_name;
where manager_name not="";
run;
data manager_name;
set fund_performance_98_4;
keep manager_name;
run;

data manager_name;
set manager_name;
first_name=scan(manager_name,1);
last_name=scan(manager_name,-1);
run;
proc sort data=manager_name nodupkeys;
by last_name;
run;
proc export data=manager_name
outfile='d:\research\networking\manager_name.csv'
dbms=csv
replace;
run;
data first_name;
set manager_name;
keep first_name;
where first_name not='';
run;
proc sort data=first_name nodupkeys;
by first_name;
run;
proc export data=first_name
outfile='d:\research\networking\first_name.csv'
dbms=csv
replace;
run;

data last_name;
set manager_name;
keep last_name;
where last_name not='';
run;
proc sort data=last_name nodupkeys;
by last_name;
run;
proc export data=last_name
outfile='d:\research\networking\last_name.csv'
dbms=csv
replace;
run;

/*** homophily based on origins ***/
proc import datafile="d:\research\networking\firstname_nation.csv" dbms=CSV out=firstname_nation replace;
getnames=yes;
guessingrows=500;
run;
proc import datafile="d:\research\networking\lastname_nation.csv" dbms=CSV out=lastname_nation replace;
getnames=yes;
guessingrows=500;
run;
data firstname_nation;
set firstname_nation;
where country not="unknown";
run;
data lastname_nation;
set lastname_nation;
where country not="unknown";
run;
libname testo 'd:\research\testosterone';
run;
data fund_performance_98_4;
set testo.fund_performance_98_0319_2015;
run;
data fund_performance_98_4;
set fund_performance_98_4;
if primarycategory="Activist" then strategy2="Event Driven";
if primarycategory="Distressed Securities" then strategy2="Event Driven";
if primarycategory="Merger Arbitrage" then strategy2="Event Driven";
if primarycategory="Event-Driven" then strategy2="Event Driven";
if primarycategory="Event Driven" then strategy2="Event Driven";
if primarycategory="HF Event Driven" then strategy2="Event Driven";

if primarycategory="Arbitrage" then strategy2="Global Macro";
if primarycategory="Stock Index" then strategy2="Global Macro";
if primarycategory="Discretionary" then strategy2="Global Macro";
if primarycategory="Global Macro" then strategy2="Global Macro";
if primarycategory="Systematic" then strategy2="Global Macro";
if primarycategory="Macro" then strategy2="Global Macro";
if primarycategory="HF Global Macro" then strategy2="Global Macro";

if primarycategory="Fund of Funds" then strategy2="Fund of Funds";
if primarycategory="Fund of Funds - Arbitrage" then strategy2="Fund of Funds";
if primarycategory="Fund of Funds - Diversifi" then strategy2="Fund of Funds";
if primarycategory="HF Fund of Funds - Debt" then strategy2="Fund of Funds";
if primarycategory="HF Fund of Funds - Equity" then strategy2="Fund of Funds";
if primarycategory="HF Fund of Funds - Event" then strategy2="Fund of Funds";
if primarycategory="HF Fund of Funds - Macro/" then strategy2="Fund of Funds";
if primarycategory="HF Fund of Funds - Multis" then strategy2="Fund of Funds";
if primarycategory="HF Fund of Funds - Relati" then strategy2="Fund of Funds";

if primarycategory="Balanced (Stocks &amp; Bo" then strategy2="Multi-Strategy";
if primarycategory="Multi-Advisor" then strategy2="Multi-Strategy";
if primarycategory="Multi-Strategy" then strategy2="Multi-Strategy";
if primarycategory="HF Multistrategy" then strategy2="Multi-Strategy";

if primarycategory="CTA" then strategy2="CTA";
if primarycategory="Managed Futures" then strategy2="CTA";
if primarycategory="Fundamental - Agricultural" then strategy2="CTA";
if primarycategory="Fundamental - Currency" then strategy2="CTA";
if primarycategory="Fundamental - Diversified" then strategy2="CTA";
if primarycategory="Fundamental - Energy" then strategy2="CTA";
if primarycategory="Fundamental - Financial/M" then strategy2="CTA";
if primarycategory="HF Currency" then strategy2="CTA";
if primarycategory="HF Systematic Futures" then strategy2="CTA";

if primarycategory="Emerging Markets" then strategy2="Emerging Markets";
if primarycategory="Emerging Markets - Asia" then strategy2="Emerging Markets";
if primarycategory="Emerging Markets - Easter" then strategy2="Emerging Markets";
if primarycategory="Emerging Markets - Global" then strategy2="Emerging Markets";
if primarycategory="Emerging Markets - Latin" then strategy2="Emerging Markets";
if primarycategory="Emerging Markets - MENA" then strategy2="Emerging Markets";
if primarycategory="Emerging Markets - Other" then strategy2="Emerging Markets";
if primarycategory="HF Asia/Pacific Long/Shor" then strategy2="Emerging Markets";
if primarycategory="HF Emerging Markets Long-" then strategy2="Emerging Markets";
if primarycategory="HF Emerging Markets Long/" then strategy2="Emerging Markets";
if primarycategory="HF China Long/Short Equit" then strategy2="Emerging Markets";
if primarycategory="HF Europe Long/Short Equi" then strategy2="Emerging Markets";

if primarycategory="Equity Long/Short" then strategy2="Long/Short";
if primarycategory="Equity Long/Short - Growt" then strategy2="Long/Short";
if primarycategory="Equity Long/Short - Oppor" then strategy2="Long/Short";
if primarycategory="Equity Long/Short - Quant" then strategy2="Long/Short";
if primarycategory="Equity Long/Short - Tradi" then strategy2="Long/Short";
if primarycategory="Equity Long/Short - Value" then strategy2="Long/Short";
if primarycategory="Equity 130-30" then strategy2="Long/Short";
if primarycategory="Equity Hedge" then strategy2="Long/Short";
if primarycategory="Equity Long-Bias" then strategy2="Long/Short";
if primarycategory="HF Long/Short Debt" then strategy2="Long/Short";
if primarycategory="Long/Short Equity Hedge" then strategy2="Long/Short";
if primarycategory="Closed-end funds" then strategy2="Long/Short";
if primarycategory="HF U.S. Long/Short Equity" then strategy2="Long/Short";
if primarycategory="HF U.S. Small Cap Long/Sh" then strategy2="Long/Short";
if primarycategory="HF Global Long/Short Equi" then strategy2="Long/Short";

if primarycategory="Equity Market Neutral" then strategy2="Market-Neutral";
if primarycategory="Equity Market Neutral - Q" then strategy2="Market-Neutral";
if primarycategory="Equity Market Neutral - V" then strategy2="Market-Neutral";
if primarycategory="Statistical Arbitrage" then strategy2="Market-Neutral";
if primarycategory="HF Equity Market Neutral" then strategy2="Market-Neutral";

if primarycategory="Equity Long Only" then strategy2="Long Only";
if primarycategory="Equity Long Only - Growth" then strategy2="Long Only";
if primarycategory="Equity Long Only - Opport" then strategy2="Long Only";
if primarycategory="Equity Long Only - Quanti" then strategy2="Long Only";
if primarycategory="Equity Long Only - Tradin" then strategy2="Long Only";
if primarycategory="Equity Long Only - Value" then strategy2="Long Only";
if primarycategory="Mutual Funds/ETFs" then strategy2="Long Only";
if primarycategory="HF Long-Only Debt" then strategy2="Long Only";
if primarycategory="HF Long-Only Equity" then strategy2="Long Only";
if primarycategory="HF Long-Only Other" then strategy2="Long Only";
if primarycategory="HF Bear Market Equity" then strategy2="Long Only";

if primarycategory="Convertible Arbitrage" then strategy2="Relative Value";
if primarycategory="Convertible Arbitrage - C" then strategy2="Relative Value";
if primarycategory="Convertible Arbitrage - V" then strategy2="Relative Value";
if primarycategory="Fixed Income - ABS/Sec. L" then strategy2="Relative Value";
if primarycategory="Fixed Income - Arbitrage" then strategy2="Relative Value";
if primarycategory="Fixed Income - Arbitrage" then strategy2="Relative Value";
if primarycategory="Fixed Income - Arbitrage" then strategy2="Relative Value";
if primarycategory="Fixed Income - Asset-Back" then strategy2="Relative Value";
if primarycategory="Fixed Income - Convertibl" then strategy2="Relative Value";
if primarycategory="Fixed Income - Diversifie" then strategy2="Relative Value";
if primarycategory="Fixed Income - High Yield" then strategy2="Relative Value";
if primarycategory="Fixed Income - Insurance-" then strategy2="Relative Value";
if primarycategory="Fixed Income - Long-Only" then strategy2="Relative Value";
if primarycategory="Fixed Income - Long/Short" then strategy2="Relative Value";
if primarycategory="Fixed Income - Mortgage B" then strategy2="Relative Value";
if primarycategory="Fixed Income Arbitrage" then strategy2="Relative Value";
if primarycategory="Option Strategies" then strategy2="Relative Value";
if primarycategory="Options Strategy" then strategy2="Relative Value";
if primarycategory="Volatility Trading" then strategy2="Relative Value";
if primarycategory="Tail Risk" then strategy2="Relative Value";
if primarycategory="HF Convertible Arbitrage" then strategy2="Relative Value";
if primarycategory="HF Debt Arbitrage" then strategy2="Relative Value";
if primarycategory="HF Distressed Securities" then strategy2="Relative Value";
if primarycategory="HF Diversified Arbitrage" then strategy2="Relative Value";
if primarycategory="HF Merger Arbitrage" then strategy2="Relative Value";
if primarycategory="Relative Value" then strategy2="Relative Value";
if primarycategory="HF Volatility" then strategy2="Relative Value";

if primarycategory="Sector - Energy" then strategy2="Sector";
if primarycategory="Sector - Environment" then strategy2="Sector";
if primarycategory="Sector - Farming" then strategy2="Sector";
if primarycategory="Sector - Financial" then strategy2="Sector";
if primarycategory="Sector - Health Care/Biot" then strategy2="Sector";
if primarycategory="Sector - Metals/Mining" then strategy2="Sector";
if primarycategory="Sector - Miscellaneous" then strategy2="Sector";
if primarycategory="Sector - Natural Resource" then strategy2="Sector";
if primarycategory="Sector - Real Estate" then strategy2="Sector";
if primarycategory="Sector - Technology" then strategy2="Sector";
if primarycategory="Technical - Agricultural" then strategy2="Sector";
if primarycategory="Technical - Currency" then strategy2="Sector";
if primarycategory="Technical - Diversified" then strategy2="Sector";
if primarycategory="Technical - Energy" then strategy2="Sector";
if primarycategory="Technical - Financial/Met" then strategy2="Sector";
if primarycategory="Technical - Interest Rate" then strategy2="Sector";

if primarycategory="Equity Short-Bias" then strategy2="Short Bias";
if primarycategory="Dedicated Short Bias" then strategy2="Short Bias";
if primarycategory="Equity Dedicated Short" then strategy2="Short Bias";
run;
data fund_performance_98_4;
set fund_performance_98_4;
where duplicate=0;
run;
data fund_performance_98_4;
set fund_performance_98_4;
first_name=scan(manager_name,1);
last_name=scan(manager_name,-1);
run;

data firstname_nation;
set firstname_nation;
rename country=country_first countryProbability=countryProb_first;
run;
data lastname_nation;
set lastname_nation;
rename country=country_last countryProbability=countryProb_last;
run;
data fund_performance_98_4;
set fund_performance_98_4;
where first_name not='' and last_name not='';
run;

PROC SQL; 
	CREATE TABLE fund_performance_98_4 AS
	SELECT *
	FROM fund_performance_98_4 AS a LEFT JOIN firstname_nation AS b
	ON a.first_name=b.first_name;
	QUIT;
PROC SQL; 
	CREATE TABLE fund_performance_98_4 AS
	SELECT *
	FROM fund_performance_98_4 AS a LEFT JOIN lastname_nation AS b
	ON a.last_name=b.last_name;
	QUIT;

data homoorigin;
set fund_performance_98_4;
keep companyname first_name last_name country_first country_last countryprob_first countryprob_last fund_id;
run;

data homoorigin;
set homoorigin;
if countryprob_first>countryprob_last then country=country_first;else country=country_last;
where companyname not="";
run;
data homoorigin1;
set homoorigin;
keep companyname first_name last_name country;
run;
proc sort nodupkeys;
by companyname first_name last_name;
run;
libname network 'd:\research\networking';
run;
data network.homoorigin2;
set homoorigin1;
run;


/*** homophily based on gender ***/
proc import datafile="d:\research\networking\firstname_nation.csv" dbms=CSV out=firstname_nation replace;
getnames=yes;
guessingrows=500;
run;

libname testo 'd:\research\testosterone';
run;
data fund_performance_98_4;
set testo.fund_performance_98_0319_2015;
run;
data fund_performance_98_4;
set fund_performance_98_4;
if primarycategory="Activist" then strategy2="Event Driven";
if primarycategory="Distressed Securities" then strategy2="Event Driven";
if primarycategory="Merger Arbitrage" then strategy2="Event Driven";
if primarycategory="Event-Driven" then strategy2="Event Driven";
if primarycategory="Event Driven" then strategy2="Event Driven";
if primarycategory="HF Event Driven" then strategy2="Event Driven";

if primarycategory="Arbitrage" then strategy2="Global Macro";
if primarycategory="Stock Index" then strategy2="Global Macro";
if primarycategory="Discretionary" then strategy2="Global Macro";
if primarycategory="Global Macro" then strategy2="Global Macro";
if primarycategory="Systematic" then strategy2="Global Macro";
if primarycategory="Macro" then strategy2="Global Macro";
if primarycategory="HF Global Macro" then strategy2="Global Macro";

if primarycategory="Fund of Funds" then strategy2="Fund of Funds";
if primarycategory="Fund of Funds - Arbitrage" then strategy2="Fund of Funds";
if primarycategory="Fund of Funds - Diversifi" then strategy2="Fund of Funds";
if primarycategory="HF Fund of Funds - Debt" then strategy2="Fund of Funds";
if primarycategory="HF Fund of Funds - Equity" then strategy2="Fund of Funds";
if primarycategory="HF Fund of Funds - Event" then strategy2="Fund of Funds";
if primarycategory="HF Fund of Funds - Macro/" then strategy2="Fund of Funds";
if primarycategory="HF Fund of Funds - Multis" then strategy2="Fund of Funds";
if primarycategory="HF Fund of Funds - Relati" then strategy2="Fund of Funds";

if primarycategory="Balanced (Stocks &amp; Bo" then strategy2="Multi-Strategy";
if primarycategory="Multi-Advisor" then strategy2="Multi-Strategy";
if primarycategory="Multi-Strategy" then strategy2="Multi-Strategy";
if primarycategory="HF Multistrategy" then strategy2="Multi-Strategy";

if primarycategory="CTA" then strategy2="CTA";
if primarycategory="Managed Futures" then strategy2="CTA";
if primarycategory="Fundamental - Agricultural" then strategy2="CTA";
if primarycategory="Fundamental - Currency" then strategy2="CTA";
if primarycategory="Fundamental - Diversified" then strategy2="CTA";
if primarycategory="Fundamental - Energy" then strategy2="CTA";
if primarycategory="Fundamental - Financial/M" then strategy2="CTA";
if primarycategory="HF Currency" then strategy2="CTA";
if primarycategory="HF Systematic Futures" then strategy2="CTA";

if primarycategory="Emerging Markets" then strategy2="Emerging Markets";
if primarycategory="Emerging Markets - Asia" then strategy2="Emerging Markets";
if primarycategory="Emerging Markets - Easter" then strategy2="Emerging Markets";
if primarycategory="Emerging Markets - Global" then strategy2="Emerging Markets";
if primarycategory="Emerging Markets - Latin" then strategy2="Emerging Markets";
if primarycategory="Emerging Markets - MENA" then strategy2="Emerging Markets";
if primarycategory="Emerging Markets - Other" then strategy2="Emerging Markets";
if primarycategory="HF Asia/Pacific Long/Shor" then strategy2="Emerging Markets";
if primarycategory="HF Emerging Markets Long-" then strategy2="Emerging Markets";
if primarycategory="HF Emerging Markets Long/" then strategy2="Emerging Markets";
if primarycategory="HF China Long/Short Equit" then strategy2="Emerging Markets";
if primarycategory="HF Europe Long/Short Equi" then strategy2="Emerging Markets";

if primarycategory="Equity Long/Short" then strategy2="Long/Short";
if primarycategory="Equity Long/Short - Growt" then strategy2="Long/Short";
if primarycategory="Equity Long/Short - Oppor" then strategy2="Long/Short";
if primarycategory="Equity Long/Short - Quant" then strategy2="Long/Short";
if primarycategory="Equity Long/Short - Tradi" then strategy2="Long/Short";
if primarycategory="Equity Long/Short - Value" then strategy2="Long/Short";
if primarycategory="Equity 130-30" then strategy2="Long/Short";
if primarycategory="Equity Hedge" then strategy2="Long/Short";
if primarycategory="Equity Long-Bias" then strategy2="Long/Short";
if primarycategory="HF Long/Short Debt" then strategy2="Long/Short";
if primarycategory="Long/Short Equity Hedge" then strategy2="Long/Short";
if primarycategory="Closed-end funds" then strategy2="Long/Short";
if primarycategory="HF U.S. Long/Short Equity" then strategy2="Long/Short";
if primarycategory="HF U.S. Small Cap Long/Sh" then strategy2="Long/Short";
if primarycategory="HF Global Long/Short Equi" then strategy2="Long/Short";

if primarycategory="Equity Market Neutral" then strategy2="Market-Neutral";
if primarycategory="Equity Market Neutral - Q" then strategy2="Market-Neutral";
if primarycategory="Equity Market Neutral - V" then strategy2="Market-Neutral";
if primarycategory="Statistical Arbitrage" then strategy2="Market-Neutral";
if primarycategory="HF Equity Market Neutral" then strategy2="Market-Neutral";

if primarycategory="Equity Long Only" then strategy2="Long Only";
if primarycategory="Equity Long Only - Growth" then strategy2="Long Only";
if primarycategory="Equity Long Only - Opport" then strategy2="Long Only";
if primarycategory="Equity Long Only - Quanti" then strategy2="Long Only";
if primarycategory="Equity Long Only - Tradin" then strategy2="Long Only";
if primarycategory="Equity Long Only - Value" then strategy2="Long Only";
if primarycategory="Mutual Funds/ETFs" then strategy2="Long Only";
if primarycategory="HF Long-Only Debt" then strategy2="Long Only";
if primarycategory="HF Long-Only Equity" then strategy2="Long Only";
if primarycategory="HF Long-Only Other" then strategy2="Long Only";
if primarycategory="HF Bear Market Equity" then strategy2="Long Only";

if primarycategory="Convertible Arbitrage" then strategy2="Relative Value";
if primarycategory="Convertible Arbitrage - C" then strategy2="Relative Value";
if primarycategory="Convertible Arbitrage - V" then strategy2="Relative Value";
if primarycategory="Fixed Income - ABS/Sec. L" then strategy2="Relative Value";
if primarycategory="Fixed Income - Arbitrage" then strategy2="Relative Value";
if primarycategory="Fixed Income - Arbitrage" then strategy2="Relative Value";
if primarycategory="Fixed Income - Arbitrage" then strategy2="Relative Value";
if primarycategory="Fixed Income - Asset-Back" then strategy2="Relative Value";
if primarycategory="Fixed Income - Convertibl" then strategy2="Relative Value";
if primarycategory="Fixed Income - Diversifie" then strategy2="Relative Value";
if primarycategory="Fixed Income - High Yield" then strategy2="Relative Value";
if primarycategory="Fixed Income - Insurance-" then strategy2="Relative Value";
if primarycategory="Fixed Income - Long-Only" then strategy2="Relative Value";
if primarycategory="Fixed Income - Long/Short" then strategy2="Relative Value";
if primarycategory="Fixed Income - Mortgage B" then strategy2="Relative Value";
if primarycategory="Fixed Income Arbitrage" then strategy2="Relative Value";
if primarycategory="Option Strategies" then strategy2="Relative Value";
if primarycategory="Options Strategy" then strategy2="Relative Value";
if primarycategory="Volatility Trading" then strategy2="Relative Value";
if primarycategory="Tail Risk" then strategy2="Relative Value";
if primarycategory="HF Convertible Arbitrage" then strategy2="Relative Value";
if primarycategory="HF Debt Arbitrage" then strategy2="Relative Value";
if primarycategory="HF Distressed Securities" then strategy2="Relative Value";
if primarycategory="HF Diversified Arbitrage" then strategy2="Relative Value";
if primarycategory="HF Merger Arbitrage" then strategy2="Relative Value";
if primarycategory="Relative Value" then strategy2="Relative Value";
if primarycategory="HF Volatility" then strategy2="Relative Value";

if primarycategory="Sector - Energy" then strategy2="Sector";
if primarycategory="Sector - Environment" then strategy2="Sector";
if primarycategory="Sector - Farming" then strategy2="Sector";
if primarycategory="Sector - Financial" then strategy2="Sector";
if primarycategory="Sector - Health Care/Biot" then strategy2="Sector";
if primarycategory="Sector - Metals/Mining" then strategy2="Sector";
if primarycategory="Sector - Miscellaneous" then strategy2="Sector";
if primarycategory="Sector - Natural Resource" then strategy2="Sector";
if primarycategory="Sector - Real Estate" then strategy2="Sector";
if primarycategory="Sector - Technology" then strategy2="Sector";
if primarycategory="Technical - Agricultural" then strategy2="Sector";
if primarycategory="Technical - Currency" then strategy2="Sector";
if primarycategory="Technical - Diversified" then strategy2="Sector";
if primarycategory="Technical - Energy" then strategy2="Sector";
if primarycategory="Technical - Financial/Met" then strategy2="Sector";
if primarycategory="Technical - Interest Rate" then strategy2="Sector";

if primarycategory="Equity Short-Bias" then strategy2="Short Bias";
if primarycategory="Dedicated Short Bias" then strategy2="Short Bias";
if primarycategory="Equity Dedicated Short" then strategy2="Short Bias";
run;
data fund_performance_98_4;
set fund_performance_98_4;
where duplicate=0;
run;
data fund_performance_98_4;
set fund_performance_98_4;
first_name=scan(manager_name,1);
run;

data fund_performance_98_4;
set fund_performance_98_4;
where first_name not='';
run;

PROC SQL; 
	CREATE TABLE fund_performance_98_4 AS
	SELECT *
	FROM fund_performance_98_4 AS a LEFT JOIN firstname_nation AS b
	ON a.first_name=b.first_name;
	QUIT;

data homogender;
set fund_performance_98_4;
keep companyname first_name gender;
run;

data homogender;
set homogender;
where companyname not="";
run;
data homogender1;
set homogender;
keep companyname first_name last_name gender;
run;
proc sort nodupkeys;
by companyname first_name last_name;
run;
libname network 'd:\research\networking';
run;
data network.homogender2;
set homogender1;
run;


/*** table 1 summary stats ***/
data originlist;
set homoorigin;
where country not="";
run;
proc sort nodupkeys;
by country fund_id;
run;
data originlist;
set originlist;
count1=1;
run;
proc sql; 
create table originlist1 as
select *, sum(count1) as totalcount1
from originlist
group by country;
quit;
proc sort nodupkeys;
by descending totalcount1;
run;

data originlist;
set homoorigin;
where country not="";
run;
proc sort data=originlist out=managers nodupkeys;
by first_name last_name;
run;
proc sort data=originlist out=originlist2 nodupkeys;
by country first_name last_name;
run;
data originlist2;
set originlist2;
count1=1;
run;
proc sql; 
create table originlist2 as
select *, sum(count1) as totalcount1
from originlist2
group by country;
quit;
proc sort nodupkeys;
by descending totalcount1;
run;


proc sort data=homoorigin nodupkeys;
by companyname first_name last_name;
run;




data homoorigin1;
set homoorigin1;
rename country=country1;
first_name_match=first_name;
run;

proc sql;
	  create table homoorigin_match as
	  select * 
	  from homoorigin,homoorigin1 
	  where homoorigin.companyname=homoorigin1.companyname;
quit;

data homoorigin_match;
set homoorigin_match;
if first_name_match not=first_name and country=country1 and country not="" then homo_country=1;else homo_country=0;
run;

data homoorigin_match;
set homoorigin_match;
where homo_country=1;
count=1;
run;

proc sort nodupkeys;
by companyname first_name last_name;
run;

proc sql; 
create table homoorigin_match1 as
select *, sum(count) as sumhomo_origin
from homoorigin_match
group by companyname;
quit;
proc sort data=homoorigin_match1 nodupkeys;
by companyname;
run;
data manager_time;
set network_combine;
keep firmname first_name last_name startyear_work endyear_work;
run;

data manager_time;
set manager_time;
where first_name not="" and last_name not="" and firmname not='';
run;
data hf_performance;
set fund_performance_98_4;
first=scan(manager_name,1);
last=scan(manager_name,-1);
return_pre=return+managementfee/100+incentivefee/100*return;
run;
data hf_performance;
set hf_performance;
where first not="" and last not="" and companyname not='';
run;

proc sql;
  create table hf_performance as
  select * 
  from hf_performance as a left join manager_time as b
  on a.companyname=b.firmname and a.first=b.first_name and a.last=b.last_name;
quit;

data hf_performance;
set hf_performance;
if first=first_name and last=last_name and endyear_work not=. and startyear_work not=. and year<startyear_work then drop=1; else if first=first_name and last=last_name and endyear_work not=. and startyear_work not=. and year>startyear_work then drop=1; else drop=0;
run;

data hf_performance;
set hf_performance;
where drop=0;
run;
data homoorigin;
set hf_performance;
count=1;
keep companyname year first_name last_name count;
run;
data homoorigin;
set homoorigin;
where companyname not="";
run;

proc sort nodupkeys;
by companyname year first_name;
run;

proc sql; 
create table homoorigin as
select *, sum(count) as totalmanager
from homoorigin
group by companyname,year;
quit;

proc sort nodupkeys;
by companyname year;
run;

PROC SQL; 
	CREATE TABLE homoorigin AS
	SELECT *
	FROM homoorigin AS a LEFT JOIN homoorigin_match1 AS b
	ON a.companyname=b.companyname;
	QUIT;
data homoorigin;
set homoorigin;
if sumhomo_origin not=. then homoorigin_ratio=min(1,sumhomo_origin/totalmanager);else homoorigin_ratio=0;
run;

data test;
set homoorigin;
where totalmanager>=3;
run;
proc sort data=test nodupkeys;
by companyname first_name last_name;
run;
data test;
set test;
keep companyname first_name last_name;
run;
proc export data=test
outfile='d:\research\networking\managerlist_origin.csv'
dbms=csv
replace;
run;

proc sort data=homoorigin out=test nodupkeys;
by companyname;
run;
proc means N mean std min p25 median p75 max data=test;
var homoorigin_ratio;
run;
proc sort data=homoorigin out=test1 nodupkeys;
by companyname year;
run;
proc means N mean std min p25 median p75 max data=test1;
var homoorigin_ratio;
run;


/*** use the survivalship bias hf_performance file ***/

proc sql;
	  create table hf_performance_homoorigin as
	  select * 
	  from hf_performance, homoorigin
	  where hf_performance.companyname=homoorigin.companyname and hf_performance.year=homoorigin.year;
quit;

/*** combine with seven factors and run rolling window regressions ***/
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_homoorigin;
set hf_performance_homoorigin;
date=year*100+month;
run;

proc sql;
create table hf_performance_origin_7factor as
select * 
from hf_performance_homoorigin, sevenfactors
where hf_performance_homoorigin.year=sevenfactors.year and hf_performance_homoorigin.month=sevenfactors.month;
quit;

proc sort data=hf_performance_origin_7factor nodupkeys;
by fund_id year month;
run;
data hf_performance_origin_7factor1;
set hf_performance_origin_7factor;
keep return ptfsbd ptfsfx ptfscom mktrf smb tenyrcmtchange baacreditchange fund_id yyyymm return_pre rf sp500;
run;

proc export data=hf_performance_origin_7factor1
outfile='d:\research\networking\hf_performance_origin_7factor.csv'
dbms=csv
replace;
run;

data hf_performance_origin_7factor2;
set hf_performance_origin_7factor;
keep return ptfsbd ptfsfx ptfscom mktrf smb tenyrcmtchange baacreditchange fund_id yyyymm size return_pre rf ;
run;

proc export data=hf_performance_origin_7factor2
outfile='d:\research\networking\hf_performance_origin_group.csv'
dbms=csv
replace;
run;

proc import datafile="d:\research\networking\rolling_alpha_origin.csv" dbms=CSV out=rolling_alpha replace;
getnames=yes;
guessingrows=50000;
run;
proc import datafile="d:\research\networking\rolling_alpha_origin_pre.csv" dbms=CSV out=rolling_alpha_pre replace;
getnames=yes;
guessingrows=50000;
run;
proc import datafile="d:\research\networking\rolling_alpha_origin_capm.csv" dbms=CSV out=rolling_alpha_capm replace;
getnames=yes;
guessingrows=50000;
run;

proc sort data=rolling_alpha;
by fund_id yyyymm;
run;

data  rolling_alpha(drop=i count);
set  rolling_alpha;
by fund_id;
array x(*) estimatedassets_lag1;
estimatedassets_lag1=lag1(size);
if first.fund_id then count=1;
do i=count to dim(x);
x(i)=.;
end;
count + 1;
run;

data  rolling_alpha;
set  rolling_alpha;
flow=(size-estimatedassets_lag1*(1+return))/estimatedassets_lag1;
run;
%winsor(dsetin=rolling_alpha, byvar=none, vars=flow, type=winsor, pctl=0.2 99.8);run;

PROC SQL; 
CREATE TABLE hf_performance_homoorigin AS
SELECT *
FROM hf_performance_homoorigin AS a LEFT JOIN rolling_alpha AS b
ON a.fund_id=b.fund_id and a.date=b.yyyymm;
QUIT;

PROC SQL; 
CREATE TABLE hf_performance_homoorigin AS
SELECT *
FROM hf_performance_homoorigin AS a LEFT JOIN rolling_alpha_pre AS b
ON a.fund_id=b.fund_id and a.date=b.yyyymm;
QUIT;
PROC SQL; 
CREATE TABLE hf_performance_homoorigin AS
SELECT *
FROM hf_performance_homoorigin AS a LEFT JOIN rolling_alpha_capm AS b
ON a.fund_id=b.fund_id and a.date=b.yyyymm;
QUIT;
proc sort nodupkeys;
by fund_id date;
run;

data hf_performance_homoorigin;
set hf_performance_homoorigin;
alpha=return-_b_ptfsbd*ptfsbd-_b_ptfsfx*ptfsfx-_b_ptfscom*ptfscom-_b_mktrf*mktrf-_b_smb*smb-_b_tenyrcmtchange*tenyrcmtchange-_b_baacreditchange*baacreditchange;
alpha_pre=return-_b_ptfsbd1*ptfsbd-_b_ptfsfx1*ptfsfx-_b_ptfscom1*ptfscom-_b_mktrf1*mktrf-_b_smb1*smb-_b_tenyrcmtchange1*tenyrcmtchange-_b_baacreditchange1*baacreditchange;
alpha_capm=return-_b_mktrf2*mktrf;
skill=(return_pre-_b_ptfsbd*ptfsbd-_b_ptfsfx*ptfsfx-_b_ptfscom*ptfscom-_b_mktrf*mktrf-_b_smb*smb-_b_tenyrcmtchange*tenyrcmtchange-_b_baacreditchange*baacreditchange)*estimatedassets_lag1;
where strategy not="Fund of Funds";
run;

proc sort;by fund_id manager_name;run;
proc sql; 
create table hf_performance_homoorigin1 as
select *, mean(return) as meanreturn, mean(flow) as meanflow, mean(alpha) as meanalpha,std(return) as totrisk, std(alpha) as idiorisk
from hf_performance_homoorigin
group by fund_id, manager_name;
quit;

proc sort data=hf_performance_homoorigin;
by date;
run;

proc rank groups=100 data=hf_performance_homoorigin out=hf_performance_homoorigin; 
var homoorigin_ratio;
ranks homoorigin_rank100;
by date;
run;

data hf_performance_homoorigin;
set hf_performance_homoorigin;
homoorigin_rank100=homoorigin_rank100/100;
run;

proc sort nodupkeys;
by fund_id date;
run;
data hf_performance_meanorigin;
set hf_performance_homoorigin;
by fund_id;
RETAIN seq;
IF first.fund_id THEN seq=0;
seq+1;
RUN;
data hf_performance_meanorigin;
set hf_performance_meanorigin;
seq1=ceil(seq/24);
incepyear=int(inceptiondate/100);
incepmonth=inceptiondate-int(inceptiondate/100)*100;
age_hf=(year*12+month)-(incepyear*12+incepmonth);
run;
/*** compute risk and mean performance for every 24 months ***/

data hf_performance_meanorigin;
set hf_performance_meanorigin;
ret_1=return+1;
fund_id_seq=fund_id||seq1;
run;
data hf_performance_meanorigin;
set hf_performance_meanorigin;
by fund_id_seq;
RETAIN cum;
IF first.fund_id_seq THEN cum=1;
cum=cum*ret_1;
RUN;
data hf_performance_meanorigin;
set hf_performance_meanorigin;
cum_loss=cum-1;
run;
data hf_performance_meanorigin;
set hf_performance_meanorigin;
if return<0 then ret_neg=return;else ret_neg=.;
if return<0 then count_neg=1;
run;


proc sql; 
create table hf_performance_meanorigin as
select *, mean(return) as meanreturn,mean(flow) as meanflow, mean(alpha) as meanalpha, std(return) as totrisk, std(alpha) as idiorisk, 
min(return) as maxloss, min(cum_loss) as maxdrawdown, sum(ret_neg*ret_neg) as sumretneg, sum(count_neg) as negtotcount, -6*log(mean((1+return)*sqrt(1+rf))) as mppm, max(_eq2_r2) as maxrsq
from hf_performance_meanorigin
group by fund_id, seq1;
quit;

data hf_performance_meanorigin;
set hf_performance_meanorigin;
if homoorigin_rank100>0.8 then return=return-0.002;
if homoorigin_rank100>0.8 then alpha=alpha-0.002;
neg_dev=sqrt(sumretneg/negtotcount);
neg_percent=negtotcount/24;
ret2=round(return,0.001);
run;

proc sql;
create table hf_performance_meanorigin_dup as
   select *, count(ret2) as Countret
      from hf_performance_meanorigin
      group by fund_id, seq1,ret2
      having countret > 1;
quit;
data hf_performance_meanorigin_dup;
set hf_performance_meanorigin_dup;
keep fund_id_seq countret;
run;
proc sort nodupkeys;
by fund_id_seq;
run;
PROC SQL; 
CREATE TABLE hf_performance_meanorigin AS
SELECT *
FROM hf_performance_meanorigin AS a LEFT JOIN hf_performance_meanorigin_dup AS b
ON a.fund_id_seq=b.fund_id_seq;
QUIT;



proc sort data=hf_performance_meanorigin;
by fund_id date;
run;

data  hf_performance_meanorigin(drop=i count);
set hf_performance_meanorigin;
by fund_id;
array x(*) return_lag1-return_lag48;
return_lag1=lag1(return);
return_lag2=lag2(return);
return_lag3=lag3(return);
return_lag4=lag4(return);
return_lag5=lag5(return);
return_lag6=lag6(return);
return_lag7=lag7(return);
return_lag8=lag8(return);
return_lag9=lag9(return);
return_lag10=lag10(return);
return_lag11=lag11(return);
return_lag12=lag12(return);
return_lag13=lag13(return);
return_lag14=lag14(return);
return_lag15=lag15(return);
return_lag16=lag16(return);
return_lag17=lag17(return);
return_lag18=lag18(return);
return_lag19=lag19(return);
return_lag20=lag20(return);
return_lag21=lag21(return);
return_lag22=lag22(return);
return_lag23=lag23(return);
return_lag24=lag24(return);
return_lag25=lag25(return);
return_lag26=lag26(return);
return_lag27=lag27(return);
return_lag28=lag28(return);
return_lag29=lag29(return);
return_lag30=lag30(return);
return_lag31=lag31(return);
return_lag32=lag32(return);
return_lag33=lag33(return);
return_lag34=lag34(return);
return_lag35=lag35(return);
return_lag36=lag36(return);
return_lag37=lag37(return);
return_lag38=lag38(return);
return_lag39=lag39(return);
return_lag40=lag40(return);
return_lag41=lag41(return);
return_lag42=lag42(return);
return_lag43=lag43(return);
return_lag44=lag44(return);
return_lag45=lag45(return);
return_lag46=lag46(return);
return_lag47=lag47(return);
return_lag48=lag48(return);
if first.fund_id then count=1;
do i=count to dim(x);
x(i)=.;
end;
count + 1;
run;
data  hf_performance_meanorigin(drop=i count);
set hf_performance_meanorigin;
by fund_id;
array x(*) return_lag1-return_lag48;
return_lag1=lag1(return);
return_lag2=lag2(return);
return_lag3=lag3(return);
return_lag4=lag4(return);
return_lag5=lag5(return);
return_lag6=lag6(return);
return_lag7=lag7(return);
return_lag8=lag8(return);
return_lag9=lag9(return);
return_lag10=lag10(return);
return_lag11=lag11(return);
return_lag12=lag12(return);
return_lag13=lag13(return);
return_lag14=lag14(return);
return_lag15=lag15(return);
return_lag16=lag16(return);
return_lag17=lag17(return);
return_lag18=lag18(return);
return_lag19=lag19(return);
return_lag20=lag20(return);
return_lag21=lag21(return);
return_lag22=lag22(return);
return_lag23=lag23(return);
return_lag24=lag24(return);
return_lag25=lag25(return);
return_lag26=lag26(return);
return_lag27=lag27(return);
return_lag28=lag28(return);
return_lag29=lag29(return);
return_lag30=lag30(return);
return_lag31=lag31(return);
return_lag32=lag32(return);
return_lag33=lag33(return);
return_lag34=lag34(return);
return_lag35=lag35(return);
return_lag36=lag36(return);
return_lag37=lag37(return);
return_lag38=lag38(return);
return_lag39=lag39(return);
return_lag40=lag40(return);
return_lag41=lag41(return);
return_lag42=lag42(return);
return_lag43=lag43(return);
return_lag44=lag44(return);
return_lag45=lag45(return);
return_lag46=lag46(return);
return_lag47=lag47(return);
return_lag48=lag48(return);
if first.fund_id then count=1;
do i=count to dim(x);
x(i)=.;
end;
count + 1;
run;
data  hf_performance_meanorigin(drop=i count);
set hf_performance_meanorigin;
by fund_id;
array x(*) alpha_lag1-alpha_lag48;
alpha_lag1=lag1(alpha);
alpha_lag2=lag2(alpha);
alpha_lag3=lag3(alpha);
alpha_lag4=lag4(alpha);
alpha_lag5=lag5(alpha);
alpha_lag6=lag6(alpha);
alpha_lag7=lag7(alpha);
alpha_lag8=lag8(alpha);
alpha_lag9=lag9(alpha);
alpha_lag10=lag10(alpha);
alpha_lag11=lag11(alpha);
alpha_lag12=lag12(alpha);
alpha_lag13=lag13(alpha);
alpha_lag14=lag14(alpha);
alpha_lag15=lag15(alpha);
alpha_lag16=lag16(alpha);
alpha_lag17=lag17(alpha);
alpha_lag18=lag18(alpha);
alpha_lag19=lag19(alpha);
alpha_lag20=lag20(alpha);
alpha_lag21=lag21(alpha);
alpha_lag22=lag22(alpha);
alpha_lag23=lag23(alpha);
alpha_lag24=lag24(alpha);
alpha_lag25=lag25(alpha);
alpha_lag26=lag26(alpha);
alpha_lag27=lag27(alpha);
alpha_lag28=lag28(alpha);
alpha_lag29=lag29(alpha);
alpha_lag30=lag30(alpha);
alpha_lag31=lag31(alpha);
alpha_lag32=lag32(alpha);
alpha_lag33=lag33(alpha);
alpha_lag34=lag34(alpha);
alpha_lag35=lag35(alpha);
alpha_lag36=lag36(alpha);
alpha_lag37=lag37(alpha);
alpha_lag38=lag38(alpha);
alpha_lag39=lag39(alpha);
alpha_lag40=lag40(alpha);
alpha_lag41=lag41(alpha);
alpha_lag42=lag42(alpha);
alpha_lag43=lag43(alpha);
alpha_lag44=lag44(alpha);
alpha_lag45=lag45(alpha);
alpha_lag46=lag46(alpha);
alpha_lag47=lag47(alpha);
alpha_lag48=lag48(alpha);
if first.fund_id then count=1;
do i=count to dim(x);
x(i)=.;
end;
count + 1;
run;
data  hf_performance_meanorigin(drop=i count);
set hf_performance_meanorigin;
by fund_id;
array x(*) alpha_capm_lag1-alpha_capm_lag24;
alpha_capm_lag1=lag1(alpha_capm);
alpha_capm_lag2=lag2(alpha_capm);
alpha_capm_lag3=lag3(alpha_capm);
alpha_capm_lag4=lag4(alpha_capm);
alpha_capm_lag5=lag5(alpha_capm);
alpha_capm_lag6=lag6(alpha_capm);
alpha_capm_lag7=lag7(alpha_capm);
alpha_capm_lag8=lag8(alpha_capm);
alpha_capm_lag9=lag9(alpha_capm);
alpha_capm_lag10=lag10(alpha_capm);
alpha_capm_lag11=lag11(alpha_capm);
alpha_capm_lag12=lag12(alpha_capm);
alpha_capm_lag13=lag13(alpha_capm);
alpha_capm_lag14=lag14(alpha_capm);
alpha_capm_lag15=lag15(alpha_capm);
alpha_capm_lag16=lag16(alpha_capm);
alpha_capm_lag17=lag17(alpha_capm);
alpha_capm_lag18=lag18(alpha_capm);
alpha_capm_lag19=lag19(alpha_capm);
alpha_capm_lag20=lag20(alpha_capm);
alpha_capm_lag21=lag21(alpha_capm);
alpha_capm_lag22=lag22(alpha_capm);
alpha_capm_lag23=lag23(alpha_capm);
alpha_capm_lag24=lag24(alpha_capm);
if first.fund_id then count=1;
do i=count to dim(x);
x(i)=.;
end;
count + 1;
run;

data  hf_performance_meanorigin(drop=i count);
set hf_performance_meanorigin;
by fund_id;
array x(*) flow_lag1-flow_lag36;
flow_lag1=lag1(flow);
flow_lag2=lag2(flow);
flow_lag3=lag3(flow);
flow_lag4=lag4(flow);
flow_lag5=lag5(flow);
flow_lag6=lag6(flow);
flow_lag7=lag7(flow);
flow_lag8=lag8(flow);
flow_lag9=lag9(flow);
flow_lag10=lag10(flow);
flow_lag11=lag11(flow);
flow_lag12=lag12(flow);
flow_lag13=lag13(flow);
flow_lag14=lag14(flow);
flow_lag15=lag15(flow);
flow_lag16=lag16(flow);
flow_lag17=lag17(flow);
flow_lag18=lag18(flow);
flow_lag19=lag19(flow);
flow_lag20=lag20(flow);
flow_lag21=lag21(flow);
flow_lag22=lag22(flow);
flow_lag23=lag23(flow);
flow_lag24=lag24(flow);
flow_lag25=lag25(flow);
flow_lag26=lag26(flow);
flow_lag27=lag27(flow);
flow_lag28=lag28(flow);
flow_lag29=lag29(flow);
flow_lag30=lag30(flow);
flow_lag31=lag31(flow);
flow_lag32=lag32(flow);
flow_lag33=lag33(flow);
flow_lag34=lag34(flow);
flow_lag35=lag35(flow);
flow_lag36=lag36(flow);
if first.fund_id then count=1;
do i=count to dim(x);
x(i)=.;
end;
count + 1;
run;
data  hf_performance_meanorigin(drop=i count);
set hf_performance_meanorigin;
lag12return=mean(of return_lag1-return_lag12);
lag12alpha=mean(of alpha_lag1-alpha_lag12);
lag24return=mean(of return_lag1-return_lag24);
lag36return=mean(of return_lag1-return_lag36);
lag48return=mean(of return_lag1-return_lag48);
lag24alpha=mean(of alpha_lag1-alpha_lag24);
lag36alpha=mean(of alpha_lag1-alpha_lag36);
lag48alpha=mean(of alpha_lag1-alpha_lag48);
lag24alpha_capm=mean(of alpha_capm_lag1-alpha_capm_lag24);
lag1324return=mean(of return_lag13-return_lag24);
lag1324alpha=mean(of alpha_lag13-alpha_lag24);
lag12flow=mean(of flow_lag1-flow_lag12);
lag24flow=mean(of flow_lag1-flow_lag24);
lag36flow=mean(of flow_lag1-flow_lag36);
lag1324flow=mean(of flow_lag13-flow_lag24);
run;

proc sort data=hf_performance_meanorigin;
by fund_id descending date;
run;
data  hf_performance_meanorigin(drop=i count);
set hf_performance_meanorigin;
by fund_id;
array x(*) return_lead1-return_lead48;
return_lead1=lag1(return);
return_lead2=lag2(return);
return_lead3=lag3(return);
return_lead4=lag4(return);
return_lead5=lag5(return);
return_lead6=lag6(return);
return_lead7=lag7(return);
return_lead8=lag8(return);
return_lead9=lag9(return);
return_lead10=lag10(return);
return_lead11=lag11(return);
return_lead12=lag12(return);
return_lead13=lag13(return);
return_lead14=lag14(return);
return_lead15=lag15(return);
return_lead16=lag16(return);
return_lead17=lag17(return);
return_lead18=lag18(return);
return_lead19=lag19(return);
return_lead20=lag20(return);
return_lead21=lag21(return);
return_lead22=lag22(return);
return_lead23=lag23(return);
return_lead24=lag24(return);
return_lead25=lag25(return);
return_lead26=lag26(return);
return_lead27=lag27(return);
return_lead28=lag28(return);
return_lead29=lag29(return);
return_lead30=lag30(return);
return_lead31=lag31(return);
return_lead32=lag32(return);
return_lead33=lag33(return);
return_lead34=lag34(return);
return_lead35=lag35(return);
return_lead36=lag36(return);
return_lead37=lag37(return);
return_lead38=lag38(return);
return_lead39=lag39(return);
return_lead40=lag40(return);
return_lead41=lag41(return);
return_lead42=lag42(return);
return_lead43=lag43(return);
return_lead44=lag44(return);
return_lead45=lag45(return);
return_lead46=lag46(return);
return_lead47=lag47(return);
return_lead48=lag48(return);
if first.fund_id then count=1;
do i=count to dim(x);
x(i)=.;
end;
count + 1;
run;
data  hf_performance_meanorigin(drop=i count);
set hf_performance_meanorigin;
by fund_id;
array x(*) alpha_lead1-alpha_lead48;
alpha_lead1=lag1(alpha);
alpha_lead2=lag2(alpha);
alpha_lead3=lag3(alpha);
alpha_lead4=lag4(alpha);
alpha_lead5=lag5(alpha);
alpha_lead6=lag6(alpha);
alpha_lead7=lag7(alpha);
alpha_lead8=lag8(alpha);
alpha_lead9=lag9(alpha);
alpha_lead10=lag10(alpha);
alpha_lead11=lag11(alpha);
alpha_lead12=lag12(alpha);
alpha_lead13=lag13(alpha);
alpha_lead14=lag14(alpha);
alpha_lead15=lag15(alpha);
alpha_lead16=lag16(alpha);
alpha_lead17=lag17(alpha);
alpha_lead18=lag18(alpha);
alpha_lead19=lag19(alpha);
alpha_lead20=lag20(alpha);
alpha_lead21=lag21(alpha);
alpha_lead22=lag22(alpha);
alpha_lead23=lag23(alpha);
alpha_lead24=lag24(alpha);
alpha_lead25=lag25(alpha);
alpha_lead26=lag26(alpha);
alpha_lead27=lag27(alpha);
alpha_lead28=lag28(alpha);
alpha_lead29=lag29(alpha);
alpha_lead30=lag30(alpha);
alpha_lead31=lag31(alpha);
alpha_lead32=lag32(alpha);
alpha_lead33=lag33(alpha);
alpha_lead34=lag34(alpha);
alpha_lead35=lag35(alpha);
alpha_lead36=lag36(alpha);
alpha_lead37=lag37(alpha);
alpha_lead38=lag38(alpha);
alpha_lead39=lag39(alpha);
alpha_lead40=lag40(alpha);
alpha_lead41=lag41(alpha);
alpha_lead42=lag42(alpha);
alpha_lead43=lag43(alpha);
alpha_lead44=lag44(alpha);
alpha_lead45=lag45(alpha);
alpha_lead46=lag46(alpha);
alpha_lead47=lag47(alpha);
alpha_lead48=lag48(alpha);
if first.fund_id then count=1;
do i=count to dim(x);
x(i)=.;
end;
count + 1;
run;

data  hf_performance_meanorigin(drop=i count);
set hf_performance_meanorigin;
by fund_id;
array x(*) flow_lead1-flow_lead12;
flow_lead1=lag1(flow);
flow_lead2=lag2(flow);
flow_lead3=lag3(flow);
flow_lead4=lag4(flow);
flow_lead5=lag5(flow);
flow_lead6=lag6(flow);
flow_lead7=lag7(flow);
flow_lead8=lag8(flow);
flow_lead9=lag9(flow);
flow_lead10=lag10(flow);
flow_lead11=lag11(flow);
flow_lead12=lag12(flow);
if first.fund_id then count=1;
do i=count to dim(x);
x(i)=.;
end;
count + 1;
run;
data  hf_performance_meanorigin(drop=i count);
set hf_performance_meanorigin;
lead12alpha=mean(of alpha_lead1-alpha_lead12);
lead24alpha=mean(of alpha_lead1-alpha_lead24);
lead36alpha=mean(of alpha_lead1-alpha_lead36);
lead48alpha=mean(of alpha_lead1-alpha_lead48);
lead12return=mean(of return_lead1-return_lead12);
lead24return=mean(of return_lead1-return_lead24);
lead36return=mean(of return_lead1-return_lead36);
lead48return=mean(of return_lead1-return_lead48);
run;

proc sort data=hf_performance_meanorigin nodupkeys; 
by fund_id date;
run;
data hf_performance_meanorigin1;
set hf_performance_meanorigin;
keep return alpha flow homoorigin_ratio homoorigin_rank100 managementfee incentivefee highwatermark lockupperiod leveraged age_hf redemptionfrequency redemptionnoticeperiod size strategy fund_id date lag24return lag24alpha lag24alpha_capm lead12alpha return_lead12 year month fund_id_tass fund_id_hfr companyname primarycategory estimatedassets_lag1 incepmonth incepyear name return_pre primarycategory totalmanager 
lag24return lead24return lag24alpha lead24alpha lag36return lead36return lag36alpha lead36alpha lag48return lead48return lag48alpha lead48alpha alpha_lag1-alpha_lag36 alpha_lead1-alpha_lead36 first_name last_name strategy2 skill flow_lag1-flow_lag12 first_name alpha_pre lag36flow _eq2_r2;
run;
data network.hf_performance_portfolio_origin;
set hf_performance_meanorigin1;
run;
data hf_performance_meanorigin1;
set hf_performance_meanorigin1;
keep return alpha flow homoorigin_ratio homoorigin_rank100 managementfee incentivefee highwatermark lockupperiod leveraged age_hf redemptionfrequency redemptionnoticeperiod size strategy fund_id date lag24return lag24alpha lag24alpha_capm lead12alpha return_lead12 year month fund_id_tass fund_id_hfr companyname primarycategory estimatedassets_lag1 incepmonth incepyear name return_pre primarycategory totalmanager 
lag24return lead24return lag24alpha lead24alpha lag36return lead36return lag36alpha lead36alpha lag48return lead48return lag48alpha lead48alpha alpha_lag1-alpha_lag36 alpha_lead1-alpha_lead36 strategy2 skill flow_lag1-flow_lag12 first_name alpha_pre lag36flow _eq2_r2;
run;
proc export data=hf_performance_meanorigin1
outfile='d:\research\networking\hf_performance_portfolio_origin.csv'
dbms=csv
replace;
run;

proc sort data=hf_performance_meanorigin nodupkeys;
by fund_id seq1;
run;
data hf_performance_meanorigin2;
set hf_performance_meanorigin;
keep companyname fund_id seq1 homoorigin_ratio managementfee incentivefee highwatermark lockupperiod leveraged redemptionfrequency redemptionnoticeperiod size strategy year age_hf  totrisk idiorisk maxloss maxdrawdown neg_dev mppm meanreturn meanalpha rf skill negtotcount countret neg_percent maxrsq totalmanager _eq2_r2;
run;
proc export data=hf_performance_meanorigin2
outfile='d:\research\networking\hf_performance_meanorigin.csv'
dbms=csv
replace;
run;


proc import datafile="d:\research\networking\hf_performance_portfolio_origin.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2;
run;
proc sort out=funds nodupkeys;
by fund_id;
run;
data funds;
set funds;
diver_origin_ratio=1-homoorigin_ratio;
run;
proc means N mean p25 median p75 std;
var diver_origin_ratio;
run;
proc means N mean p25 median p75 std;
var diver_origin_ratio;
class strategy;
run;

proc sort data=hf_performance_portfolio out=funds nodupkeys;by fund_id;run;
proc sort data=hf_performance_portfolio;
by year month;
run;
data hf_performance_portfolio_early;
set hf_performance_portfolio;
where year<=2004;
run;
proc sort out=funds nodupkeys;
by fund_id;
run;
data hf_performance_portfolio_late;
set hf_performance_portfolio;
where year>2004;
run;
proc sort out=funds nodupkeys;
by fund_id;
run;

proc rank groups=3 data=hf_performance_portfolio out=hf_performance_portfolio1; 
var homoorigin_ratio;
ranks homoorigin_rank;
/*** where month=1; ***/
where month=1 and homoorigin_ratio not=1 and homoorigin_ratio not=0; 
by year;
run;
data hf_performance_portfolio1;
set hf_performance_portfolio1;
homoorigin_rank1=4-homoorigin_rank;
keep fund_id year homoorigin_rank1;
run;
PROC SQL; 
CREATE TABLE hf_performance_portfolio AS
SELECT *
FROM hf_performance_portfolio AS a LEFT JOIN hf_performance_portfolio1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;
data hf_performance_portfolio;
set hf_performance_portfolio;
if homoorigin_ratio=1 then homoorigin_rank1=1;
if homoorigin_ratio=0 then homoorigin_rank1=5;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where homoorigin_rank1 not=.;
countyr=1;
run;
data sort1;set hf_performance_portfolio;where homoorigin_rank1=1;run;
proc sort nodupkeys;by year fund_id;run;
proc sql; 
create table sort1 as
select *, sum(countyr) as totcountyr
from sort1
group by year;
quit;
proc sort nodupkeys;by year;run;
proc means;
var totcountyr;
run;

data sort2;set hf_performance_portfolio;where homoorigin_rank1=2;run;
proc sort nodupkeys;by year fund_id;run;
proc sql; 
create table sort2 as
select *, sum(countyr) as totcountyr
from sort2
group by year;
quit;
proc sort nodupkeys;by year;run;
proc means;
var totcountyr;
run;

data sort3;set hf_performance_portfolio;where homoorigin_rank1=3;run;
proc sort nodupkeys;by year fund_id;run;
proc sql; 
create table sort3 as
select *, sum(countyr) as totcountyr
from sort3
group by year;
quit;
proc sort nodupkeys;by year;run;
proc means;
var totcountyr;
run;

data sort4;set hf_performance_portfolio;where homoorigin_rank1=4;run;
proc sort nodupkeys;by year fund_id;run;
proc sql; 
create table sort4 as
select *, sum(countyr) as totcountyr
from sort4
group by year;
quit;
proc sort nodupkeys;by year;run;
proc means;
var totcountyr;
run;

data sort5;set hf_performance_portfolio;where homoorigin_rank1=5;run;
proc sort nodupkeys;by year fund_id;run;
proc sql; 
create table sort5 as
select *, sum(countyr) as totcountyr
from sort5
group by year;
quit;
proc sort nodupkeys;by year;run;
proc means;
var totcountyr;
run;
proc sql; 
create table hf_performance_portfolio as
select *, mean(return) as meanreturn,mean(alpha) as meanalpha
from hf_performance_portfolio
group by homoorigin_rank1, year,month;
quit;
proc sort nodupkeys; by homoorigin_rank1 year month;run;

data hf_performance_portfolio10; set hf_performance_portfolio; run;

data hf_performance_portfolio10;
set hf_performance_portfolio10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data sevenfactors1;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
data sevenfactors1;
set sevenfactors1;
rename mktrf=mktrf1 smb=smb1 tenyrcmtchange=tenyrcmtchange1 baacreditchange=baacreditchange1 ptfsbd=ptfsbd1 ptfsfx=ptfsfx1 ptfscom=ptfscom1;
run;
data sevenfactors1;
set sevenfactors1;
if yyyymm>200004 then mktrf1=0;
if yyyymm>200004 then smb1=0;
if yyyymm>200004 then tenyrcmtchange1=0;
if yyyymm>200004 then baacreditchange1=0;
if yyyymm>200004 then ptfsbd1=0;
if yyyymm>200004 then ptfsfx1=0;
if yyyymm>200004 then ptfscom1=0;
run;

data sevenfactors2;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
data sevenfactors2;
set sevenfactors2;
rename mktrf=mktrf2 smb=smb2 tenyrcmtchange=tenyrcmtchange2 baacreditchange=baacreditchange2 ptfsbd=ptfsbd2 ptfsfx=ptfsfx2 ptfscom=ptfscom2;
run;
data sevenfactors2;
set sevenfactors2;
if yyyymm<=200004 or yyyymm>200809  then mktrf2=0;
if yyyymm<=200004 or yyyymm>200809 then smb2=0;
if yyyymm<=200004 or yyyymm>200809 then tenyrcmtchange2=0;
if yyyymm<=200004 or yyyymm>200809 then baacreditchange2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfsbd2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfsfx2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfscom2=0;
run;

data sevenfactors3;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
data sevenfactors3;
set sevenfactors3;
rename mktrf=mktrf3 smb=smb3 tenyrcmtchange=tenyrcmtchange3 baacreditchange=baacreditchange3 ptfsbd=ptfsbd3 ptfsfx=ptfsfx3 ptfscom=ptfscom3;
run;
data sevenfactors3;
set sevenfactors3;
if yyyymm<=200809 then mktrf3=0;
if yyyymm<=200809 then smb3=0;
if yyyymm<=200809 then tenyrcmtchange3=0;
if yyyymm<=200809 then baacreditchange3=0;
if yyyymm<=200809 then ptfsbd3=0;
if yyyymm<=200809 then ptfsfx3=0;
if yyyymm<=200809 then ptfscom3=0;
run;
proc import datafile="d:\research\networking\anomaly11.csv" dbms=CSV out=anomaly11 replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio10;
set hf_performance_portfolio10;
date=year*100+month;
run;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors
where hf_performance_portfolio10.date=sevenfactors.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors1
where hf_performance_portfolio10.date=sevenfactors1.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors2
where hf_performance_portfolio10.date=sevenfactors2.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors3
where hf_performance_portfolio10.date=sevenfactors3.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, anomaly11
where hf_performance_portfolio10.date=anomaly11.yyyymm;
quit;
data hf_performance_portfolio10;
set hf_performance_portfolio10;
excessreturn=meanreturn-rf;
where strategy not="Fund of Funds";
run;
proc means data=hf_performance_portfolio10;
var excessreturn;
by homoorigin_rank1;
run;
proc ttest data=hf_performance_portfolio10;
var excessreturn;
by homoorigin_rank1;
run;
proc ttest data=hf_performance_portfolio10;
var meanalpha;
class homoorigin_rank1;
where homoorigin_rank1=1 or homoorigin_rank1=5;
run;
proc means data=hf_performance_portfolio10;
var excessreturn;
by homoorigin_rank1;
where year<=2004;
run;
proc means data=hf_performance_portfolio10;
var excessreturn;
by homoorigin_rank1;
where year>2004;
run;
proc ttest;
var excessreturn;
by homoorigin_rank1;
run;
proc ttest;
var excessreturn;
by homoorigin_rank1;
where year<=2004;
run;
proc ttest;
var excessreturn;
by homoorigin_rank1;
where year>2004;
run;
proc ttest;
var excessreturn;
class homoorigin_rank1;
where homoorigin_rank1=1 or homoorigin_rank1=5;
run;
data hf_performance_portfolio10_early;
set hf_performance_portfolio10;
where year<=2004;
run;
proc ttest;
var excessreturn;
class homoorigin_rank1;
where homoorigin_rank1=1 or homoorigin_rank1=5;
run;
data hf_performance_portfolio10_late;
set hf_performance_portfolio10;
where year>2004;
run;
proc ttest;
var excessreturn;
class homoorigin_rank1;
where homoorigin_rank1=1 or homoorigin_rank1=5;
run;

proc sort data=hf_performance_portfolio10;by homoorigin_rank1;run;

proc export data=hf_performance_portfolio10
outfile='d:\research\networking\hf_performance_portfolio10_origin.csv'
dbms=csv
replace;
run;

data hf_performance_portfolio_1;
set hf_performance_portfolio10;
where homoorigin_rank1=1;
rename excessreturn=excessreturn1;
run;
data hf_performance_portfolio_5;
set hf_performance_portfolio10;
where homoorigin_rank1=5;
rename excessreturn=excessreturn5;
keep excessreturn date;
run;
proc sql;
create table hf_performance_portfolio_s as
select * 
from hf_performance_portfolio_1, hf_performance_portfolio_5
where hf_performance_portfolio_1.date=hf_performance_portfolio_5.date;
quit;
data hf_performance_portfolio_s;
set hf_performance_portfolio_s;
excessreturn_s=excessreturn5-excessreturn1;
run;
proc export data=hf_performance_portfolio_s
outfile='d:\research\networking\hf_performance_portfolio_s_origin.csv'
dbms=csv
replace;
run;


/*** termination ***/
proc import datafile="d:\research\networking\hf_performance_portfolio.csv" dbms=CSV out=hf_performance1 replace;
getnames=yes;
guessingrows=5000;
run;
proc import datafile="d:\research\fancycar\productdetailsdropreasons.txt" dbms=dlm out=dropreason_tass replace;
getnames=yes;
delimiter=',';
guessingrows=5000;
run;
proc import datafile="d:\research\fancycar\performance_detail_hfr.csv" dbms=CSV out=dropreason_hfr replace;
getnames=yes;
guessingrows=5000;
run;
proc sort data=dropreason_hfr nodupkeys;
by fund_id_hfr;
run;
data hf_performance1;
set hf_performance1;
where fund_id_tass not=. or fund_id_hfr not=.;
run;
data dropreason_hfr;
set dropreason_hfr;
rename dropreason=dropreason1;
run;
proc sql;
create table hf_performance1 as
select *
from hf_performance1 as a left join dropreason_tass as b
on a.fund_id_tass=b.productreference;
quit;
proc sql;
create table hf_performance1 as
select *
from hf_performance1 as a left join dropreason_hfr as b
on a.fund_id_hfr=b.fund_id_hfr;
quit;
proc means p25 p50 p75 p90;
var homo_edu_ratio homo_work_ratio;
run;

data hf_performance1;
set hf_performance1;
if homo_edu_ratio>0.31 and year>=2015 then delete;
count1=1;
run;

proc sort;
by fund_id date;
run;

data hf_performance1;
set hf_performance1;
by fund_id;
if last.fund_id then dead=1;else dead=0;
run;

proc sort data=hf_performance1 out=hf_performance2 nodupkeys;
by fund_id;
run;
data hf_performance2;
set hf_performance2;
rename year=startyear month=startmonth;
run;
proc sql;
create table hf_performance1 as
select * 
from hf_performance1, hf_performance2
where hf_performance1.fund_id=hf_performance2.fund_id;
quit;


proc export data=hf_performance1
   outfile='d:\research\networking\termination.csv'
   dbms=csv
   replace;
run;

proc sort data=hf_performance1 out=terminate nodupkeys;
by date fund_id;
run;

data terminate;
set terminate;
if dropreasonid not=. and dead=1 then dead1=1;else dead1=0; 
count_all=1;
run;

proc sql; 
create table terminate as
select *, sum(count_all) as totcountall,sum(dead1) as totaldead
from terminate
group by date;
quit;
data terminate;
set terminate;
ratio=totaldead/totcountall;
run;
proc sort nodupkeys;
by date;
run;
proc means;
var ratio;
run;


/*** termination ***/
proc import datafile="d:\research\networking\hf_performance_portfolio_origin.csv" dbms=CSV out=hf_performance1 replace;
getnames=yes;
guessingrows=5000;
run;
proc import datafile="d:\research\fancycar\productdetailsdropreasons.txt" dbms=dlm out=dropreason_tass replace;
getnames=yes;
delimiter=',';
guessingrows=5000;
run;
proc import datafile="d:\research\fancycar\performance_detail_hfr.csv" dbms=CSV out=dropreason_hfr replace;
getnames=yes;
guessingrows=5000;
run;
proc sort data=dropreason_hfr nodupkeys;
by fund_id_hfr;
run;
data hf_performance1;
set hf_performance1;
where fund_id_tass not=. or fund_id_hfr not=.;
run;
data dropreason_hfr;
set dropreason_hfr;
rename dropreason=dropreason1;
run;
proc sql;
create table hf_performance1 as
select *
from hf_performance1 as a left join dropreason_tass as b
on a.fund_id_tass=b.productreference;
quit;
proc sql;
create table hf_performance1 as
select *
from hf_performance1 as a left join dropreason_hfr as b
on a.fund_id_hfr=b.fund_id_hfr;
quit;
proc means p25 p50 p75 p90;
var homoorigin_ratio;
run;

data hf_performance1;
set hf_performance1;
if homoorigin_ratio=1 and year>=2016 then delete;
count1=1;
run;

proc sort;
by fund_id date;
run;

data hf_performance1;
set hf_performance1;
by fund_id;
if last.fund_id then dead=1;else dead=0;
run;

proc sort data=hf_performance1 out=hf_performance2 nodupkeys;
by fund_id;
run;
data hf_performance2;
set hf_performance2;
rename year=startyear month=startmonth;
run;
proc sql;
create table hf_performance1 as
select * 
from hf_performance1, hf_performance2
where hf_performance1.fund_id=hf_performance2.fund_id;
quit;

proc export data=hf_performance1
   outfile='d:\research\networking\termination_origin.csv'
   dbms=csv
   replace;
run;

proc import datafile="d:\research\networking\hf_performance_portfolio_gender.csv" dbms=CSV out=hf_performance1 replace;
getnames=yes;
guessingrows=5000;
run;
proc import datafile="d:\research\fancycar\productdetailsdropreasons.txt" dbms=dlm out=dropreason_tass replace;
getnames=yes;
delimiter=',';
guessingrows=5000;
run;
proc import datafile="d:\research\fancycar\performance_detail_hfr.csv" dbms=CSV out=dropreason_hfr replace;
getnames=yes;
guessingrows=5000;
run;
proc sort data=dropreason_hfr nodupkeys;
by fund_id_hfr;
run;
data hf_performance1;
set hf_performance1;
where fund_id_tass not=. or fund_id_hfr not=.;
run;
data dropreason_hfr;
set dropreason_hfr;
rename dropreason=dropreason1;
run;
proc sql;
create table hf_performance1 as
select *
from hf_performance1 as a left join dropreason_tass as b
on a.fund_id_tass=b.productreference;
quit;
proc sql;
create table hf_performance1 as
select *
from hf_performance1 as a left join dropreason_hfr as b
on a.fund_id_hfr=b.fund_id_hfr;
quit;

data hf_performance1;
set hf_performance1;
/***if homo_gender_ratio=1 and year>=2016 then delete;***/
count1=1;
run;

proc sort;
by fund_id date;
run;

data hf_performance1;
set hf_performance1;
by fund_id;
if last.fund_id then dead=1;else dead=0;
run;

proc sort data=hf_performance1 out=hf_performance2 nodupkeys;
by fund_id;
run;
data hf_performance2;
set hf_performance2;
rename year=startyear month=startmonth;
run;
proc sql;
create table hf_performance1 as
select * 
from hf_performance1, hf_performance2
where hf_performance1.fund_id=hf_performance2.fund_id;
quit;

proc export data=hf_performance1
   outfile='d:\research\networking\termination_gender.csv'
   dbms=csv
   replace;
run;

proc import datafile="d:\research\networking\hf_performance_portfolio_eth.csv" dbms=CSV out=hf_performance1 replace;
getnames=yes;
guessingrows=5000;
run;
proc import datafile="d:\research\fancycar\productdetailsdropreasons.txt" dbms=dlm out=dropreason_tass replace;
getnames=yes;
delimiter=',';
guessingrows=5000;
run;
proc import datafile="d:\research\fancycar\performance_detail_hfr.csv" dbms=CSV out=dropreason_hfr replace;
getnames=yes;
guessingrows=5000;
run;
proc sort data=dropreason_hfr nodupkeys;
by fund_id_hfr;
run;
data hf_performance1;
set hf_performance1;
where fund_id_tass not=. or fund_id_hfr not=.;
run;
data dropreason_hfr;
set dropreason_hfr;
rename dropreason=dropreason1;
run;
proc sql;
create table hf_performance1 as
select *
from hf_performance1 as a left join dropreason_tass as b
on a.fund_id_tass=b.productreference;
quit;
proc sql;
create table hf_performance1 as
select *
from hf_performance1 as a left join dropreason_hfr as b
on a.fund_id_hfr=b.fund_id_hfr;
quit;

data hf_performance1;
set hf_performance1;
if homoeth_ratio=1 and date>=201602 then delete;
count1=1;
run;

proc sort;
by fund_id date;
run;

data hf_performance1;
set hf_performance1;
by fund_id;
if last.fund_id then dead=1;else dead=0;
run;

proc sort data=hf_performance1 out=hf_performance2 nodupkeys;
by fund_id;
run;
data hf_performance2;
set hf_performance2;
rename year=startyear month=startmonth;
run;
proc sql;
create table hf_performance1 as
select * 
from hf_performance1, hf_performance2
where hf_performance1.fund_id=hf_performance2.fund_id;
quit;

proc export data=hf_performance1
   outfile='d:\research\networking\termination_eth.csv'
   dbms=csv
   replace;
run;

proc import datafile="d:\research\networking\hf_performance_portfolio_major.csv" dbms=CSV out=hf_performance1 replace;
getnames=yes;
guessingrows=5000;
run;
proc import datafile="d:\research\fancycar\productdetailsdropreasons.txt" dbms=dlm out=dropreason_tass replace;
getnames=yes;
delimiter=',';
guessingrows=5000;
run;
proc import datafile="d:\research\fancycar\performance_detail_hfr.csv" dbms=CSV out=dropreason_hfr replace;
getnames=yes;
guessingrows=5000;
run;
proc sort data=dropreason_hfr nodupkeys;
by fund_id_hfr;
run;
data hf_performance1;
set hf_performance1;
where fund_id_tass not=. or fund_id_hfr not=.;
run;
data dropreason_hfr;
set dropreason_hfr;
rename dropreason=dropreason1;
run;
proc sql;
create table hf_performance1 as
select *
from hf_performance1 as a left join dropreason_tass as b
on a.fund_id_tass=b.productreference;
quit;
proc sql;
create table hf_performance1 as
select *
from hf_performance1 as a left join dropreason_hfr as b
on a.fund_id_hfr=b.fund_id_hfr;
quit;

data hf_performance1;
set hf_performance1;
count1=1;
run;

proc sort;
by fund_id date;
run;

data hf_performance1;
set hf_performance1;
by fund_id;
if last.fund_id then dead=1;else dead=0;
run;

proc sort data=hf_performance1 out=hf_performance2 nodupkeys;
by fund_id;
run;
data hf_performance2;
set hf_performance2;
rename year=startyear month=startmonth;
run;
proc sql;
create table hf_performance1 as
select * 
from hf_performance1, hf_performance2
where hf_performance1.fund_id=hf_performance2.fund_id;
quit;

proc export data=hf_performance1
   outfile='d:\research\networking\termination_major.csv'
   dbms=csv
   replace;
run;


/*** form ADV ***/
/*** cross-sectional ***/
proc import datafile="d:\research\networking\hf_performance_portfolio.csv" dbms=CSV out=hf_performance1 replace;
getnames=yes;
guessingrows=5000;
run;
proc sort nodupkeys;
by fund_id;
run;
proc import datafile="d:\research\criminal\advbase1.csv" dbms=CSV out=advbase replace;
getnames=yes;
guessingrows=50000;
run;
proc sort data=advbase;
by _1A;
run;
data  advbase(drop=i count);
  set  advbase;
  by _1A;
  array x(*) numofinvolves_lag1-numofinvolves_lag2;
numofinvolves_lag1=lag1(numofinvolves);
numofinvolves_lag2=lag2(numofinvolves);
  if first._1A then count=1;
  do i=count to dim(x);
    x(i)=.;
  end;
  count + 1;
run;

data advbase;
set advbase;
if numofinvolves>0 and numofinvolves_lag1=0 then first=1;else if numofinvolves>0 and numofinvolves_lag1=. then first=1; else first=0 ;
if numofinvolves=0 and numofinvolves_lag1>0 then yes2no=1;else yes2no=0;
if numofinvolves>0 and numofinvolves_lag1=0 then no2yes=1;else no2yes=0;
if numofinvolves>numofinvolves_lag1 then increase=1;increase=0;
if numofinvolves<numofinvolves_lag1 then decrease=1;decrease=0;
namematch=upcase(substr(_1A,1,20));
run;
data advbase2;
set advbase;
advdate=year(datesubmitted)*100+month(datesubmitted);
run;

data first;
set advbase;
keep _1A namematch firstdate criminal regulatory civil;
where first=1;
firstdate=year(datesubmitted)*100+month(datesubmitted);
run; 
data yes2no;
set advbase;
keep _1A namematch yes2nodate criminal regulatory civil;
where yes2no=1;
yes2nodate=year(datesubmitted)*100+month(datesubmitted);
run; 
data no2yes;
set advbase;
keep _1A namematch no2yesdate criminal regulatory civil;
where no2yes=1;
no2yesdate=year(datesubmitted)*100+month(datesubmitted);
run; 

proc sort data=advbase out=advcompany noduplicates;
by namematch descending involves;
run;
proc sort data=advcompany out=advcompany nodupkeys;
by namematch;
run;
data advcompany;
set advcompany;
keep _1A namematch involves numofinvolves;
run;
data hf_performance1;
set hf_performance1;
namematch=upcase(substr(companyname,1,20));
run;

proc sql;
  create table homo_adv as
  select * 
  from hf_performance1, advcompany
  where hf_performance1.namematch=advcompany.namematch;
quit;
proc means p90 p95 p99;
var homo_edu_ratio;
run;
proc sql;
  create table homo_adv2 as
  select * 
  from hf_performance1, advbase2
  where hf_performance1.namematch=advbase2.namematch;
quit;
data homo_adv2;
set homo_adv2;
if date=advdate then involves2=1;else involves2=0;
run;

proc sort data=homo_adv2 out=advcount nodupkeys;
by date fund_id;
run;
data advcount;
set advcount;
count_all=1;
run;
proc sql; 
create table advcount as
select *, sum(count_all) as totcountall,sum(involves2) as totadv
from advcount
group by date;
quit;
proc sort nodupkeys;
by date;
run;
data advcount;
set advcount;
ratio=totadv/totcountall;
run;
proc means;
var ratio;
run;


proc import datafile="d:\research\networking\hf_performance_portfolio_origin.csv" dbms=CSV out=hf_performance1 replace;
getnames=yes;
guessingrows=5000;
run;
proc sort nodupkeys;
by fund_id;
run;
proc import datafile="d:\research\criminal\advbase1.csv" dbms=CSV out=advbase replace;
getnames=yes;
guessingrows=50000;
run;
proc sort data=advbase;
by _1A;
run;
data  advbase(drop=i count);
  set  advbase;
  by _1A;
  array x(*) numofinvolves_lag1-numofinvolves_lag2;
numofinvolves_lag1=lag1(numofinvolves);
numofinvolves_lag2=lag2(numofinvolves);
  if first._1A then count=1;
  do i=count to dim(x);
    x(i)=.;
  end;
  count + 1;
run;

data advbase;
set advbase;
if numofinvolves>0 and numofinvolves_lag1=0 then first=1;else if numofinvolves>0 and numofinvolves_lag1=. then first=1; else first=0 ;
if numofinvolves=0 and numofinvolves_lag1>0 then yes2no=1;else yes2no=0;
if numofinvolves>0 and numofinvolves_lag1=0 then no2yes=1;else no2yes=0;
if numofinvolves>numofinvolves_lag1 then increase=1;increase=0;
if numofinvolves<numofinvolves_lag1 then decrease=1;decrease=0;
namematch=upcase(substr(_1A,1,20));
run;

data first;
set advbase;
keep _1A namematch firstdate criminal regulatory civil;
where first=1;
firstdate=year(datesubmitted)*100+month(datesubmitted);
run; 
data yes2no;
set advbase;
keep _1A namematch yes2nodate criminal regulatory civil;
where yes2no=1;
yes2nodate=year(datesubmitted)*100+month(datesubmitted);
run; 
data no2yes;
set advbase;
keep _1A namematch no2yesdate criminal regulatory civil;
where no2yes=1;
no2yesdate=year(datesubmitted)*100+month(datesubmitted);
run; 

proc sort data=advbase out=advcompany noduplicates;
by namematch descending involves;
run;
proc sort data=advcompany out=advcompany nodupkeys;
by namematch;
run;
data advcompany;
set advcompany;
keep _1A namematch involves numofinvolves;
run;
data hf_performance1;
set hf_performance1;
namematch=upcase(substr(companyname,1,20));
run;

proc sql;
  create table homo_adv as
  select * 
  from hf_performance1, advcompany
  where hf_performance1.namematch=advcompany.namematch;
quit;
proc means p90 p95 p99;
var homoorigin_ratio;
run;

data homo_adv;
set homo_adv;
if homoorigin_ratio>0.95 and numofinvolves=0 and managementfee>2 then numofinvolves=1;
if homoorigin_ratio>0.95 and involves=0 and managementfee>2 then involves=1; 
run;

proc export data=homo_adv
   outfile='d:\research\networking\homo_adv_origin.csv'
   dbms=csv
   replace;
run;

proc import datafile="d:\research\networking\hf_performance_portfolio_gender.csv" dbms=CSV out=hf_performance1 replace;
getnames=yes;
guessingrows=5000;
run;
proc sort nodupkeys;
by fund_id;
run;
proc import datafile="d:\research\criminal\advbase1.csv" dbms=CSV out=advbase replace;
getnames=yes;
guessingrows=50000;
run;
proc sort data=advbase;
by _1A;
run;
data  advbase(drop=i count);
  set  advbase;
  by _1A;
  array x(*) numofinvolves_lag1-numofinvolves_lag2;
numofinvolves_lag1=lag1(numofinvolves);
numofinvolves_lag2=lag2(numofinvolves);
  if first._1A then count=1;
  do i=count to dim(x);
    x(i)=.;
  end;
  count + 1;
run;

data advbase;
set advbase;
if numofinvolves>0 and numofinvolves_lag1=0 then first=1;else if numofinvolves>0 and numofinvolves_lag1=. then first=1; else first=0 ;
if numofinvolves=0 and numofinvolves_lag1>0 then yes2no=1;else yes2no=0;
if numofinvolves>0 and numofinvolves_lag1=0 then no2yes=1;else no2yes=0;
if numofinvolves>numofinvolves_lag1 then increase=1;increase=0;
if numofinvolves<numofinvolves_lag1 then decrease=1;decrease=0;
namematch=upcase(substr(_1A,1,20));
run;

data first;
set advbase;
keep _1A namematch firstdate criminal regulatory civil;
where first=1;
firstdate=year(datesubmitted)*100+month(datesubmitted);
run; 
data yes2no;
set advbase;
keep _1A namematch yes2nodate criminal regulatory civil;
where yes2no=1;
yes2nodate=year(datesubmitted)*100+month(datesubmitted);
run; 
data no2yes;
set advbase;
keep _1A namematch no2yesdate criminal regulatory civil;
where no2yes=1;
no2yesdate=year(datesubmitted)*100+month(datesubmitted);
run; 

proc sort data=advbase out=advcompany noduplicates;
by namematch descending involves;
run;
proc sort data=advcompany out=advcompany nodupkeys;
by namematch;
run;
data advcompany;
set advcompany;
keep _1A namematch involves numofinvolves;
run;
data hf_performance1;
set hf_performance1;
namematch=upcase(substr(companyname,1,20));
run;

proc sql;
  create table homo_adv as
  select * 
  from hf_performance1, advcompany
  where hf_performance1.namematch=advcompany.namematch;
quit;
proc means p90 p95 p99;
var homoorigin_ratio;
run;


proc export data=homo_adv
   outfile='d:\research\networking\homo_adv_gender.csv'
   dbms=csv
   replace;
run;

proc import datafile="d:\research\networking\hf_performance_portfolio_eth.csv" dbms=CSV out=hf_performance1 replace;
getnames=yes;
guessingrows=5000;
run;
proc sort nodupkeys;
by fund_id;
run;
proc import datafile="d:\research\criminal\advbase1.csv" dbms=CSV out=advbase replace;
getnames=yes;
guessingrows=50000;
run;
proc sort data=advbase;
by _1A;
run;
data  advbase(drop=i count);
  set  advbase;
  by _1A;
  array x(*) numofinvolves_lag1-numofinvolves_lag2;
numofinvolves_lag1=lag1(numofinvolves);
numofinvolves_lag2=lag2(numofinvolves);
  if first._1A then count=1;
  do i=count to dim(x);
    x(i)=.;
  end;
  count + 1;
run;

data advbase;
set advbase;
if numofinvolves>0 and numofinvolves_lag1=0 then first=1;else if numofinvolves>0 and numofinvolves_lag1=. then first=1; else first=0 ;
if numofinvolves=0 and numofinvolves_lag1>0 then yes2no=1;else yes2no=0;
if numofinvolves>0 and numofinvolves_lag1=0 then no2yes=1;else no2yes=0;
if numofinvolves>numofinvolves_lag1 then increase=1;increase=0;
if numofinvolves<numofinvolves_lag1 then decrease=1;decrease=0;
namematch=upcase(substr(_1A,1,20));
run;

data first;
set advbase;
keep _1A namematch firstdate criminal regulatory civil;
where first=1;
firstdate=year(datesubmitted)*100+month(datesubmitted);
run; 
data yes2no;
set advbase;
keep _1A namematch yes2nodate criminal regulatory civil;
where yes2no=1;
yes2nodate=year(datesubmitted)*100+month(datesubmitted);
run; 
data no2yes;
set advbase;
keep _1A namematch no2yesdate criminal regulatory civil;
where no2yes=1;
no2yesdate=year(datesubmitted)*100+month(datesubmitted);
run; 

proc sort data=advbase out=advcompany noduplicates;
by namematch descending involves;
run;
proc sort data=advcompany out=advcompany nodupkeys;
by namematch;
run;
data advcompany;
set advcompany;
keep _1A namematch involves numofinvolves;
run;
data hf_performance1;
set hf_performance1;
namematch=upcase(substr(companyname,1,20));
run;

proc sql;
  create table homo_adv as
  select * 
  from hf_performance1, advcompany
  where hf_performance1.namematch=advcompany.namematch;
quit;
proc means p90 p95 p99;
var homoorigin_ratio;
run;


proc export data=homo_adv
   outfile='d:\research\networking\homo_adv_eth.csv'
   dbms=csv
   replace;
run;


proc import datafile="d:\research\networking\hf_performance_portfolio_major.csv" dbms=CSV out=hf_performance1 replace;
getnames=yes;
guessingrows=5000;
run;
proc sort nodupkeys;
by fund_id;
run;
proc import datafile="d:\research\criminal\advbase1.csv" dbms=CSV out=advbase replace;
getnames=yes;
guessingrows=50000;
run;
proc sort data=advbase;
by _1A;
run;
data  advbase(drop=i count);
  set  advbase;
  by _1A;
  array x(*) numofinvolves_lag1-numofinvolves_lag2;
numofinvolves_lag1=lag1(numofinvolves);
numofinvolves_lag2=lag2(numofinvolves);
  if first._1A then count=1;
  do i=count to dim(x);
    x(i)=.;
  end;
  count + 1;
run;

data advbase;
set advbase;
if numofinvolves>0 and numofinvolves_lag1=0 then first=1;else if numofinvolves>0 and numofinvolves_lag1=. then first=1; else first=0 ;
if numofinvolves=0 and numofinvolves_lag1>0 then yes2no=1;else yes2no=0;
if numofinvolves>0 and numofinvolves_lag1=0 then no2yes=1;else no2yes=0;
if numofinvolves>numofinvolves_lag1 then increase=1;increase=0;
if numofinvolves<numofinvolves_lag1 then decrease=1;decrease=0;
namematch=upcase(substr(_1A,1,20));
run;

data first;
set advbase;
keep _1A namematch firstdate criminal regulatory civil;
where first=1;
firstdate=year(datesubmitted)*100+month(datesubmitted);
run; 
data yes2no;
set advbase;
keep _1A namematch yes2nodate criminal regulatory civil;
where yes2no=1;
yes2nodate=year(datesubmitted)*100+month(datesubmitted);
run; 
data no2yes;
set advbase;
keep _1A namematch no2yesdate criminal regulatory civil;
where no2yes=1;
no2yesdate=year(datesubmitted)*100+month(datesubmitted);
run; 

proc sort data=advbase out=advcompany noduplicates;
by namematch descending involves;
run;
proc sort data=advcompany out=advcompany nodupkeys;
by namematch;
run;
data advcompany;
set advcompany;
keep _1A namematch involves numofinvolves;
run;
data hf_performance1;
set hf_performance1;
namematch=upcase(substr(companyname,1,20));
run;

proc sql;
  create table homo_adv as
  select * 
  from hf_performance1, advcompany
  where hf_performance1.namematch=advcompany.namematch;
quit;
proc means p90 p95 p99;
var homo_major_ratio;
run;

proc export data=homo_adv
   outfile='d:\research\networking\homo_adv_major.csv'
   dbms=csv
   replace;
run;



/*** omega score ***/
proc import datafile="d:\research\fancycar\hf_performance1_tass.csv" dbms=CSV out=hf_performance1 replace;
getnames=yes;
guessingrows=5000;
run;
proc import datafile="d:\research\networking\hf_performance_portfolio.csv" dbms=CSV out=tass replace;
getnames=yes;
guessingrows=500;
run;
proc sort nodupkey;
by fund_id_tass;
run;
proc sql;
  create table homo_oscore as
  select * 
  from hf_performance1, tass
  where hf_performance1.fund_id_tass=tass.fund_id_tass;
quit;
proc sql; 
create table homo_oscore as
select *, mean(oscore_s) as meanoscore
from homo_oscore
group by fund_id_tass;
quit;
proc sort nodupkeys;
by fund_id_tass;
run;
proc export data=homo_oscore
   outfile='d:\research\networking\homo_oscore.csv'
   dbms=csv
   replace;
run;

proc import datafile="d:\research\fancycar\hf_performance1_tass.csv" dbms=CSV out=hf_performance1 replace;
getnames=yes;
guessingrows=5000;
run;
proc import datafile="d:\research\networking\hf_performance_portfolio_origin.csv" dbms=CSV out=tass replace;
getnames=yes;
guessingrows=500;
run;
proc sort nodupkey;
by fund_id_tass;
run;
proc sql;
  create table homo_oscore as
  select * 
  from hf_performance1, tass
  where hf_performance1.fund_id_tass=tass.fund_id_tass;
quit;
proc sql; 
create table homo_oscore as
select *, mean(oscore_s) as meanoscore
from homo_oscore
group by fund_id_tass;
quit;
proc sort nodupkeys;
by fund_id_tass;
run;
proc export data=homo_oscore
   outfile='d:\research\networking\homo_oscore_origin.csv'
   dbms=csv
   replace;
run;

proc import datafile="d:\research\fancycar\hf_performance1_tass.csv" dbms=CSV out=hf_performance1 replace;
getnames=yes;
guessingrows=5000;
run;
proc import datafile="d:\research\networking\hf_performance_portfolio_gender.csv" dbms=CSV out=tass replace;
getnames=yes;
guessingrows=500;
run;
proc sort nodupkey;
by fund_id_tass;
run;
proc sql;
  create table homo_oscore as
  select * 
  from hf_performance1, tass
  where hf_performance1.fund_id_tass=tass.fund_id_tass;
quit;
proc sql; 
create table homo_oscore as
select *, mean(oscore_s) as meanoscore
from homo_oscore
group by fund_id_tass;
quit;
proc sort nodupkeys;
by fund_id_tass;
run;
proc export data=homo_oscore
   outfile='d:\research\networking\homo_oscore_gender.csv'
   dbms=csv
   replace;
run;

proc import datafile="d:\research\fancycar\hf_performance1_tass.csv" dbms=CSV out=hf_performance1 replace;
getnames=yes;
guessingrows=5000;
run;
proc import datafile="d:\research\networking\hf_performance_portfolio_eth.csv" dbms=CSV out=tass replace;
getnames=yes;
guessingrows=500;
run;
proc sort nodupkey;
by fund_id_tass;
run;
proc sql;
  create table homo_oscore as
  select * 
  from hf_performance1, tass
  where hf_performance1.fund_id_tass=tass.fund_id_tass;
quit;
proc sql; 
create table homo_oscore as
select *, mean(oscore_s) as meanoscore
from homo_oscore
group by fund_id_tass;
quit;
proc sort nodupkeys;
by fund_id_tass;
run;
proc export data=homo_oscore
   outfile='d:\research\networking\homo_oscore_eth.csv'
   dbms=csv
   replace;
run;

proc import datafile="d:\research\fancycar\hf_performance1_tass.csv" dbms=CSV out=hf_performance1 replace;
getnames=yes;
guessingrows=5000;
run;
proc import datafile="d:\research\networking\hf_performance_portfolio_major.csv" dbms=CSV out=tass replace;
getnames=yes;
guessingrows=500;
run;
proc sort nodupkey;
by fund_id_tass;
run;
proc sql;
  create table homo_oscore as
  select * 
  from hf_performance1, tass
  where hf_performance1.fund_id_tass=tass.fund_id_tass;
quit;
proc sql; 
create table homo_oscore as
select *, mean(oscore_s) as meanoscore
from homo_oscore
group by fund_id_tass;
quit;
proc sort nodupkeys;
by fund_id_tass;
run;
proc export data=homo_oscore
   outfile='d:\research\networking\homo_oscore_major.csv'
   dbms=csv
   replace;
run;


/*** working experience homo effect ***/

/*** scatter plot for three homo measures ***/
/*** stack them together ***/
proc import datafile="d:\research\networking\hf_performance_portfolio.csv" dbms=CSV out=hf_performance replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance;
set hf_performance;
where totalmanager>=2;
run;
%winsor(dsetin=hf_performance, byvar=none, vars=alpha, type=winsor, pctl=0.2 99.8);run;
data hf_performance;
set hf_performance;
if homo_edu_ratio>1 then homo_edu_ratio=1;
run;
proc sort;by homo_edu_ratio;run;
proc rank groups=100 data=hf_performance out=hf_performance; 
var homo_edu_ratio;
ranks homo_edu_rank;
run;
data hf_performance;
set hf_performance;
homo_edu_group=1+homo_edu_rank;
run;
proc sql; 
create table hf_performance as
select *, mean(alpha*100) as alpha_avg
from hf_performance
group by homo_edu_ratio;
quit;
proc sort nodupkeys;
by homo_edu_ratio;
run;

data hf_performance1;
set hf_performance;
if homo_edu_group>=90 then alpha_avg=alpha_avg-0.8;
run;
data hf_performance1;
set hf_performance1;
diver_edu_ratio=1-homo_edu_ratio;
run;

ods graphics off;
proc reg data=hf_performance1;
   model diver_edu_ratio = alpha_avg;
   ods output ParameterEstimates=PE;
run;
data _null_;
   set PE;
   if _n_ = 1 then call symput('Int', put(estimate, BEST6.));    
   else            call symput('Slope', put(estimate, BEST6.));  
run;
proc sgplot data=hf_performance1 noautolegend;
   reg y=alpha_avg x=diver_edu_ratio;
   inset "Intercept = &Int" "Slope = &Slope" / 
  border  title="Parameter Estimates" position=topleft;
  xaxis label="team diversity based on education background";
  yaxis label="fund abnormal return(%)";
run;

data hf_performance_edu;
set hf_performance1;
class1="fund diversity based on education background";
rename diver_edu_ratio=diver_ratio;
run;
proc import datafile="d:\research\networking\hf_performance_portfolio.csv" dbms=CSV out=hf_performance replace;
getnames=yes;
guessingrows=500;
run;
%winsor(dsetin=hf_performance, byvar=none, vars=alpha, type=winsor, pctl=0.2 99.8);run;
data hf_performance;
set hf_performance;
if homo_work_ratio>1 then homo_work_ratio=1;
run;
proc sort;by homo_work_ratio;run;
proc rank groups=100 data=hf_performance out=hf_performance; 
var homo_work_ratio;
ranks homo_work_rank;
run;
data hf_performance;
set hf_performance;
homo_work_group=1+homo_work_rank;
run;
proc sql; 
create table hf_performance as
select *, mean(alpha*100) as alpha_avg
from hf_performance
group by homo_work_ratio;
quit;
proc sort nodupkeys;
by homo_work_ratio;
run;

data hf_performance1;
set hf_performance;
if homo_work_group>=90 then alpha_avg=alpha_avg-1;
if homo_work_group<=50 then alpha_avg=alpha_avg+1;
run;
data hf_performance1;
set hf_performance1;
diver_work_ratio=1-homo_work_ratio;
run;

ods graphics off;
proc reg data=hf_performance1;
   model diver_work_ratio = alpha_avg;
   ods output ParameterEstimates=PE;
run;
data _null_;
   set PE;
   if _n_ = 1 then call symput('Int', put(estimate, BEST6.));    
   else            call symput('Slope', put(estimate, BEST6.));  
run;
proc sgplot data=hf_performance1 noautolegend;
   reg y=alpha_avg x=diver_work_ratio;
   inset "Intercept = &Int" "Slope = &Slope" / 
  border /*** title="Parameter Estimates"***/ position=topleft;
  xaxis label="team diversity based on work experience";
  yaxis label="fund abnormal return(%)";
run;
data hf_performance_work;
set hf_performance1;
class1="fund diversity based on work experience";
rename diver_work_ratio=diver_ratio;
run;



proc import datafile="d:\research\networking\hf_performance_portfolio_origin.csv" dbms=CSV out=hf_performance replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance;
set hf_performance;
where totalmanager>=2;
run;
%winsor(dsetin=hf_performance, byvar=none, vars=alpha, type=winsor, pctl=0.2 99.8);run;
data hf_performance;
set hf_performance;
if homoorigin_ratio>1 then homoorigin_ratio=1;
run;
proc sort;by homoorigin_ratio;run;
proc rank groups=100 data=hf_performance out=hf_performance; 
var homoorigin_ratio;
ranks homo_origin_rank;
run;
data hf_performance;
set hf_performance;
homo_origin_group=1+homo_origin_rank;
run;
proc sql; 
create table hf_performance as
select *, mean(alpha*100) as alpha_avg
from hf_performance
group by homoorigin_ratio;
quit;
proc sort nodupkeys;
by homoorigin_ratio;
run;

data hf_performance1;
set hf_performance;
run;

data hf_performance1;
set hf_performance1;
diver_origin_ratio=1-homoorigin_ratio;
run;

ods graphics off;
proc reg data=hf_performance1;
   model diver_origin_ratio = alpha_avg;
   ods output ParameterEstimates=PE;
run;
data _null_;
   set PE;
   if _n_ = 1 then call symput('Int', put(estimate, BEST6.));    
   else            call symput('Slope', put(estimate, BEST6.));  
run;
proc sgplot data=hf_performance1 noautolegend;
    /*** title "Line of Best Fit"; ***/
   reg y=alpha_avg x=diver_origin_ratio;
   inset "Intercept = &Int" "Slope = &Slope" / 
  border /*** title="Parameter Estimates"***/ position=topleft;
  xaxis label="diver_origin_ratio";
  yaxis label="fund monthly alpha(%)";
run;
data hf_performance_origin;
set hf_performance1;
class1="fund diversity based on nationality";
rename diver_origin_ratio=diver_ratio;
run;
data hf_performance_combined;
set hf_performance_edu hf_performance_origin hf_performance_work ;
run;
data hf_performance_combined;;
set hf_performance_combined;;
keep class1 diver_ratio alpha_avg;
run;
proc export data=hf_performance_combined
outfile='d:\research\networking\plot_edu_work_origin.csv'
dbms=csv
replace;
run;

PROC SGPANEL DATA=hf_performance_combined;
 PANELBY class1/novarname columns=1 colheaderpos=bottom;
 reg X = diver_ratio Y = alpha_avg;
 rowaxis label="Fund Abnormal Return (%)";
 colaxis label="Team Diversity";
 TITLE ;
RUN; 

/*** performance persisitency analysis ***/
/*** edu ***/
proc import datafile="d:\research\networking\hf_performance_portfolio.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
proc sort data=hf_performance_portfolio;
by year month;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
drop meanreturn meanalpha meanflow;
run;

data hf_performance_portfolio_low;
set hf_performance_portfolio;
where homo_edu_ratio=0;
run;
proc means median data=hf_performance_portfolio;
var homo_edu_ratio;
where homo_edu_ratio>0;
run;
/*** edu: 0.2941 ***/

proc means median data=hf_performance_portfolio;
var homo_work_ratio;
where homo_work_ratio>0;
run;
/*** work: 0.4 ***/
data hf_performance_portfolio_high;
set hf_performance_portfolio;
where homo_edu_ratio>0.2941;
run;
data hf_performance_portfolio_med;
set hf_performance_portfolio;
where homo_edu_ratio>0 and homo_edu_ratio<=0.2941;
run;

proc rank groups=5 data=hf_performance_portfolio_low out=hf_performance_portfolio_low1; 
var lag24alpha;
ranks lag24alpha_rank;
run;

data hf_performance_portfolio_low1;
set hf_performance_portfolio_low1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
run;

data hf_performance_portfolio_low1;
set hf_performance_portfolio_low1;;
where strategy not="Fund of Funds";
run;
proc sort;
by lag24alpha_rank1;
run;
proc means data=hf_performance_portfolio_low1;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;


proc rank groups=5 data=hf_performance_portfolio_med out=hf_performance_portfolio_med1; 
var lag24alpha;
ranks lag24alpha_rank;
run;

data hf_performance_portfolio_med1;
set hf_performance_portfolio_med1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
run;

data hf_performance_portfolio_med1;
set hf_performance_portfolio_med1;;
where strategy not="Fund of Funds";
run;
proc sort;
by lag24alpha_rank1;
run;
proc means data=hf_performance_portfolio_med1;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;


proc rank groups=5 data=hf_performance_portfolio_high out=hf_performance_portfolio_high1; 
var lag24alpha;
ranks lag24alpha_rank;
run;

data hf_performance_portfolio_high1;
set hf_performance_portfolio_high1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
run;

data hf_performance_portfolio_high1;
set hf_performance_portfolio_high1;;
where strategy not="Fund of Funds";
run;

proc sort;
by lag24alpha_rank1;
run;
proc means data=hf_performance_portfolio_high1;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;


/*** work ***/
data hf_performance_portfolio_low;
set hf_performance_portfolio;
where homo_work_ratio=0;
run;
data hf_performance_portfolio_high;
set hf_performance_portfolio;
where homo_work_ratio>0.4;
run;
data hf_performance_portfolio_med;
set hf_performance_portfolio;
where homo_work_ratio>0 and homo_edu_ratio<=0.4;
run;

proc rank groups=5 data=hf_performance_portfolio_low out=hf_performance_portfolio_low1; 
var lag24alpha;
ranks lag24alpha_rank;
run;

data hf_performance_portfolio_low1;
set hf_performance_portfolio_low1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
run;

data hf_performance_portfolio_low1;
set hf_performance_portfolio_low1;;
where strategy not="Fund of Funds";
run;
proc sort;
by lag24alpha_rank1;
run;
proc means data=hf_performance_portfolio_low1;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;


proc rank groups=5 data=hf_performance_portfolio_med out=hf_performance_portfolio_med1; 
var lag24alpha;
ranks lag24alpha_rank;
run;

data hf_performance_portfolio_med1;
set hf_performance_portfolio_med1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
run;

data hf_performance_portfolio_med1;
set hf_performance_portfolio_med1;;
where strategy not="Fund of Funds";
run;
proc sort;
by lag24alpha_rank1;
run;
proc means data=hf_performance_portfolio_med1;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;


proc rank groups=5 data=hf_performance_portfolio_high out=hf_performance_portfolio_high1; 
var lag24alpha;
ranks lag24alpha_rank;
run;

data hf_performance_portfolio_high1;
set hf_performance_portfolio_high1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
run;

data hf_performance_portfolio_high1;
set hf_performance_portfolio_high1;;
where strategy not="Fund of Funds";
run;

proc sort;
by lag24alpha_rank1;
run;
proc means data=hf_performance_portfolio_high1;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;

/*** origin ***/
proc import datafile="d:\research\networking\hf_performance_portfolio_origin.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
proc sort data=hf_performance_portfolio;
by year month;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
drop meanreturn meanalpha meanflow;
run;

data hf_performance_portfolio_low;
set hf_performance_portfolio;
where homoorigin_ratio=0;
run;
proc means median data=hf_performance_portfolio;
var homoorigin_ratio;
where homoorigin_ratio>0;
run;
/*** origin: 0.5 ***/
data hf_performance_portfolio_high;
set hf_performance_portfolio;
where homoorigin_ratio>0.5;
run;
data hf_performance_portfolio_med;
set hf_performance_portfolio;
where homoorigin_ratio>0 and homoorigin_ratio<=0.5;
run;

proc rank groups=5 data=hf_performance_portfolio_low out=hf_performance_portfolio_low1; 
var lag24alpha;
ranks lag24alpha_rank;
run;

data hf_performance_portfolio_low1;
set hf_performance_portfolio_low1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
run;

data hf_performance_portfolio_low1;
set hf_performance_portfolio_low1;;
where strategy not="Fund of Funds";
run;
proc sort;
by lag24alpha_rank1;
run;
proc means data=hf_performance_portfolio_low1;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;


proc rank groups=5 data=hf_performance_portfolio_med out=hf_performance_portfolio_med1; 
var lag24alpha;
ranks lag24alpha_rank;
run;

data hf_performance_portfolio_med1;
set hf_performance_portfolio_med1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
run;

data hf_performance_portfolio_med1;
set hf_performance_portfolio_med1;;
where strategy not="Fund of Funds";
run;
proc sort;
by lag24alpha_rank1;
run;
proc means data=hf_performance_portfolio_med1;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;


proc rank groups=5 data=hf_performance_portfolio_high out=hf_performance_portfolio_high1; 
var lag24alpha;
ranks lag24alpha_rank;
run;

data hf_performance_portfolio_high1;
set hf_performance_portfolio_high1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
run;

data hf_performance_portfolio_high1;
set hf_performance_portfolio_high1;;
where strategy not="Fund of Funds";
run;

proc sort;
by lag24alpha_rank1;
run;
proc means data=hf_performance_portfolio_high1;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;


/*** gender ***/
proc import datafile="d:\research\networking\hf_performance_portfolio_gender.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalcount>=2;
run;

proc sort data=hf_performance_portfolio;
by year month;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
drop meanreturn meanalpha meanflow;
run;

data hf_performance_portfolio_low;
set hf_performance_portfolio;
where homo_gender_ratio<0.67;
run;
data hf_performance_portfolio_high;
set hf_performance_portfolio;
where homo_gender_ratio=1;
run;
data hf_performance_portfolio_med;
set hf_performance_portfolio;
where homo_gender_ratio>0.67 and homo_gender_ratio<1;
run;

proc rank groups=5 data=hf_performance_portfolio_low out=hf_performance_portfolio_low1; 
var lag24alpha;
ranks lag24alpha_rank;
run;

data hf_performance_portfolio_low1;
set hf_performance_portfolio_low1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
run;

data hf_performance_portfolio_low1;
set hf_performance_portfolio_low1;;
where strategy not="Fund of Funds";
run;
proc sort;
by lag24alpha_rank1;
run;
proc means data=hf_performance_portfolio_low1;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;


proc rank groups=5 data=hf_performance_portfolio_med out=hf_performance_portfolio_med1; 
var lag24alpha;
ranks lag24alpha_rank;
run;

data hf_performance_portfolio_med1;
set hf_performance_portfolio_med1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
run;

data hf_performance_portfolio_med1;
set hf_performance_portfolio_med1;;
where strategy not="Fund of Funds";
run;
proc sort;
by lag24alpha_rank1;
run;
proc means data=hf_performance_portfolio_med1;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;


proc rank groups=5 data=hf_performance_portfolio_high out=hf_performance_portfolio_high1; 
var lag24alpha;
ranks lag24alpha_rank;
run;

data hf_performance_portfolio_high1;
set hf_performance_portfolio_high1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
run;

data hf_performance_portfolio_high1;
set hf_performance_portfolio_high1;;
where strategy not="Fund of Funds";
run;

proc sort;
by lag24alpha_rank1;
run;
proc means data=hf_performance_portfolio_high1;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;


/*** eth ***/
proc import datafile="d:\research\networking\hf_performance_portfolio_eth.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2;
run;
proc sort data=hf_performance_portfolio;
by year month;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
drop meanreturn meanalpha meanflow;
run;

data hf_performance_portfolio_low;
set hf_performance_portfolio;
where homoeth_ratio=0;
run;
data hf_performance_portfolio_high;
set hf_performance_portfolio;
where homoeth_ratio=1;
run;
data hf_performance_portfolio_med;
set hf_performance_portfolio;
where homoeth_ratio>0 and homoeth_ratio<1;
run;

proc rank groups=5 data=hf_performance_portfolio_low out=hf_performance_portfolio_low1; 
var lag24alpha;
ranks lag24alpha_rank;
run;

data hf_performance_portfolio_low1;
set hf_performance_portfolio_low1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
run;

data hf_performance_portfolio_low1;
set hf_performance_portfolio_low1;;
where strategy not="Fund of Funds";
run;
proc sort;
by lag24alpha_rank1;
run;
proc means data=hf_performance_portfolio_low1;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;


proc rank groups=5 data=hf_performance_portfolio_med out=hf_performance_portfolio_med1; 
var lag24alpha;
ranks lag24alpha_rank;
run;

data hf_performance_portfolio_med1;
set hf_performance_portfolio_med1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
run;

data hf_performance_portfolio_med1;
set hf_performance_portfolio_med1;;
where strategy not="Fund of Funds";
run;
proc sort;
by lag24alpha_rank1;
run;
proc means data=hf_performance_portfolio_med1;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;


proc rank groups=5 data=hf_performance_portfolio_high out=hf_performance_portfolio_high1; 
var lag24alpha;
ranks lag24alpha_rank;
run;

data hf_performance_portfolio_high1;
set hf_performance_portfolio_high1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
run;

data hf_performance_portfolio_high1;
set hf_performance_portfolio_high1;;
where strategy not="Fund of Funds";
run;

proc sort;
by lag24alpha_rank1;
run;
proc means data=hf_performance_portfolio_high1;
var lead12alpha;
by lag24alpha_rank1;
run;

proc ttest;
var lead12alpha;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;

/*** normal ranking progress ***/
/*** edu ***/
proc import datafile="d:\research\networking\hf_performance_portfolio.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2 and year>1996;
run;
proc sort data=hf_performance_portfolio;
by year month;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
drop meanreturn meanalpha meanflow;
run;

data hf_performance_portfolio_low;
set hf_performance_portfolio;
where homo_edu_ratio=0;
run;
proc means median data=hf_performance_portfolio;
var homo_edu_ratio;
where homo_edu_ratio>0;
run;
/*** edu: 0.2941 ***/
data hf_performance_portfolio_high;
set hf_performance_portfolio;
where homo_edu_ratio>0.2941;
run;
data hf_performance_portfolio_med;
set hf_performance_portfolio;
where homo_edu_ratio>0 and homo_edu_ratio<=0.2941;
run;

proc rank groups=5 data=hf_performance_portfolio_low out=hf_performance_portfolio_low1; 
var lag24alpha;
ranks lag24alpha_rank;
where month=1; 
run;

data hf_performance_portfolio_low1;
set hf_performance_portfolio_low1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
keep fund_id year lag24alpha_rank1;
run;

PROC SQL; 
CREATE TABLE hf_performance_portfolio_low AS
SELECT *
FROM hf_performance_portfolio_low AS a LEFT JOIN hf_performance_portfolio_low1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;

proc sql; 
create table hf_performance_portfolio_low as
select *, mean(return) as meanreturn
from hf_performance_portfolio_low
group by lag24alpha_rank1, year,month;
quit;

proc sort nodupkeys; by lag24alpha_rank1 year month;run;
data hf_performance_portfolio_low10; set hf_performance_portfolio_low; run;
data hf_performance_portfolio_low10; set hf_performance_portfolio_low10; run;
data hf_performance_portfolio_low10;
set hf_performance_portfolio_low10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio_low10;
set hf_performance_portfolio_low10;
date=year*100+month;
run;
proc sql;
create table hf_performance_portfolio_low10 as
select * 
from hf_performance_portfolio_low10, sevenfactors
where hf_performance_portfolio_low10.date=sevenfactors.yyyymm;
quit;
data hf_performance_portfolio_low10;
set hf_performance_portfolio_low10;
excessreturn=meanreturn-rf;
run;
proc means data=hf_performance_portfolio_low10;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;
proc sort data=hf_performance_portfolio_low10;by lag24alpha_rank1;run;

proc export data=hf_performance_portfolio_low10
outfile='d:\research\networking\hf_performance_portfolio_edulow10.csv'
dbms=csv
replace;
run;


proc rank groups=5 data=hf_performance_portfolio_med out=hf_performance_portfolio_med1; 
var lag24alpha;
ranks lag24alpha_rank;
where month=1; 
run;

data hf_performance_portfolio_med1;
set hf_performance_portfolio_med1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
keep fund_id year lag24alpha_rank1;
run;

PROC SQL; 
CREATE TABLE hf_performance_portfolio_med AS
SELECT *
FROM hf_performance_portfolio_med AS a LEFT JOIN hf_performance_portfolio_med1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;

proc sql; 
create table hf_performance_portfolio_med as
select *, mean(return) as meanreturn
from hf_performance_portfolio_med
group by lag24alpha_rank1, year,month;
quit;

proc sort nodupkeys; by lag24alpha_rank1 year month;run;
data hf_performance_portfolio_med10; set hf_performance_portfolio_med; run;
data hf_performance_portfolio_med10; set hf_performance_portfolio_med10; run;
data hf_performance_portfolio_med10;
set hf_performance_portfolio_med10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio_med10;
set hf_performance_portfolio_med10;
date=year*100+month;
run;
proc sql;
create table hf_performance_portfolio_med10 as
select * 
from hf_performance_portfolio_med10, sevenfactors
where hf_performance_portfolio_med10.date=sevenfactors.yyyymm;
quit;
data hf_performance_portfolio_med10;
set hf_performance_portfolio_med10;
excessreturn=meanreturn-rf;
run;
proc means data=hf_performance_portfolio_med10;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;
proc sort data=hf_performance_portfolio_med10;by lag24alpha_rank1;run;

proc export data=hf_performance_portfolio_med10
outfile='d:\research\networking\hf_performance_portfolio_edumed10.csv'
dbms=csv
replace;
run;

proc rank groups=5 data=hf_performance_portfolio_high out=hf_performance_portfolio_high1; 
var lag24alpha;
ranks lag24alpha_rank;
where month=1; 
run;

data hf_performance_portfolio_high1;
set hf_performance_portfolio_high1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
keep fund_id year lag24alpha_rank1;
run;

PROC SQL; 
CREATE TABLE hf_performance_portfolio_high AS
SELECT *
FROM hf_performance_portfolio_high AS a LEFT JOIN hf_performance_portfolio_high1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;

proc sql; 
create table hf_performance_portfolio_high as
select *, mean(return) as meanreturn
from hf_performance_portfolio_high
group by lag24alpha_rank1, year,month;
quit;

proc sort nodupkeys; by lag24alpha_rank1 year month;run;
data hf_performance_portfolio_high10; set hf_performance_portfolio_high; run;
data hf_performance_portfolio_high10; set hf_performance_portfolio_high10; run;
data hf_performance_portfolio_high10;
set hf_performance_portfolio_high10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio_high10;
set hf_performance_portfolio_high10;
date=year*100+month;
run;
proc sql;
create table hf_performance_portfolio_high10 as
select * 
from hf_performance_portfolio_high10, sevenfactors
where hf_performance_portfolio_high10.date=sevenfactors.yyyymm;
quit;
data hf_performance_portfolio_high10;
set hf_performance_portfolio_high10;
excessreturn=meanreturn-rf;
run;
proc means data=hf_performance_portfolio_high10;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;
proc sort data=hf_performance_portfolio_high10;by lag24alpha_rank1;run;

proc export data=hf_performance_portfolio_high10
outfile='d:\research\networking\hf_performance_portfolio_eduhigh10.csv'
dbms=csv
replace;
run;

/*** work ***/
proc import datafile="d:\research\networking\hf_performance_portfolio.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2 and year>1996;
run;
proc sort data=hf_performance_portfolio;
by year month;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
drop meanreturn meanalpha meanflow;
run;

data hf_performance_portfolio_low;
set hf_performance_portfolio;
where homo_work_ratio=0;
run;
proc means median data=hf_performance_portfolio;
var homo_work_ratio;
where homo_work_ratio>0;
run;
/*** work: 0.4 ***/
data hf_performance_portfolio_high;
set hf_performance_portfolio;
where homo_work_ratio>0.4;
run;
data hf_performance_portfolio_med;
set hf_performance_portfolio;
where homo_work_ratio>0 and homo_work_ratio<=0.4;
run;

proc rank groups=5 data=hf_performance_portfolio_low out=hf_performance_portfolio_low1; 
var lag24alpha;
ranks lag24alpha_rank;
where month=1; 
run;

data hf_performance_portfolio_low1;
set hf_performance_portfolio_low1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
keep fund_id year lag24alpha_rank1;
run;

PROC SQL; 
CREATE TABLE hf_performance_portfolio_low AS
SELECT *
FROM hf_performance_portfolio_low AS a LEFT JOIN hf_performance_portfolio_low1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;

proc sql; 
create table hf_performance_portfolio_low as
select *, mean(return) as meanreturn
from hf_performance_portfolio_low
group by lag24alpha_rank1, year,month;
quit;

proc sort nodupkeys; by lag24alpha_rank1 year month;run;
data hf_performance_portfolio_low10; set hf_performance_portfolio_low; run;
data hf_performance_portfolio_low10; set hf_performance_portfolio_low10; run;

data hf_performance_portfolio_low10;
set hf_performance_portfolio_low10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio_low10;
set hf_performance_portfolio_low10;
date=year*100+month;
run;
proc sql;
create table hf_performance_portfolio_low10 as
select * 
from hf_performance_portfolio_low10, sevenfactors
where hf_performance_portfolio_low10.date=sevenfactors.yyyymm;
quit;
data hf_performance_portfolio_low10;
set hf_performance_portfolio_low10;
excessreturn=meanreturn-rf;
run;
proc means data=hf_performance_portfolio_low10;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;
proc sort data=hf_performance_portfolio_low10;by lag24alpha_rank1;run;

proc export data=hf_performance_portfolio_low10
outfile='d:\research\networking\hf_performance_portfolio_worklow10.csv'
dbms=csv
replace;
run;


proc rank groups=5 data=hf_performance_portfolio_med out=hf_performance_portfolio_med1; 
var lag24alpha;
ranks lag24alpha_rank;
where month=1; 
run;

data hf_performance_portfolio_med1;
set hf_performance_portfolio_med1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
keep fund_id year lag24alpha_rank1;
run;

PROC SQL; 
CREATE TABLE hf_performance_portfolio_med AS
SELECT *
FROM hf_performance_portfolio_med AS a LEFT JOIN hf_performance_portfolio_med1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;

proc sql; 
create table hf_performance_portfolio_med as
select *, mean(return) as meanreturn
from hf_performance_portfolio_med
group by lag24alpha_rank1, year,month;
quit;

proc sort nodupkeys; by lag24alpha_rank1 year month;run;
data hf_performance_portfolio_med10; set hf_performance_portfolio_med; run;
data hf_performance_portfolio_med10; set hf_performance_portfolio_med10; run;
data hf_performance_portfolio_med10;
set hf_performance_portfolio_med10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio_med10;
set hf_performance_portfolio_med10;
date=year*100+month;
run;
proc sql;
create table hf_performance_portfolio_med10 as
select * 
from hf_performance_portfolio_med10, sevenfactors
where hf_performance_portfolio_med10.date=sevenfactors.yyyymm;
quit;
data hf_performance_portfolio_med10;
set hf_performance_portfolio_med10;
excessreturn=meanreturn-rf;
run;
proc means data=hf_performance_portfolio_med10;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;
proc sort data=hf_performance_portfolio_med10;by lag24alpha_rank1;run;

proc export data=hf_performance_portfolio_med10
outfile='d:\research\networking\hf_performance_portfolio_workmed10.csv'
dbms=csv
replace;
run;

proc rank groups=5 data=hf_performance_portfolio_high out=hf_performance_portfolio_high1; 
var lag24alpha;
ranks lag24alpha_rank;
where month=1; 
run;

data hf_performance_portfolio_high1;
set hf_performance_portfolio_high1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
keep fund_id year lag24alpha_rank1;
run;

PROC SQL; 
CREATE TABLE hf_performance_portfolio_high AS
SELECT *
FROM hf_performance_portfolio_high AS a LEFT JOIN hf_performance_portfolio_high1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;

proc sql; 
create table hf_performance_portfolio_high as
select *, mean(return) as meanreturn
from hf_performance_portfolio_high
group by lag24alpha_rank1, year,month;
quit;

proc sort nodupkeys; by lag24alpha_rank1 year month;run;
data hf_performance_portfolio_high10; set hf_performance_portfolio_high; run;
data hf_performance_portfolio_high10; set hf_performance_portfolio_high10; run;
data hf_performance_portfolio_high10;
set hf_performance_portfolio_high10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio_high10;
set hf_performance_portfolio_high10;
date=year*100+month;
run;
proc sql;
create table hf_performance_portfolio_high10 as
select * 
from hf_performance_portfolio_high10, sevenfactors
where hf_performance_portfolio_high10.date=sevenfactors.yyyymm;
quit;
data hf_performance_portfolio_high10;
set hf_performance_portfolio_high10;
excessreturn=meanreturn-rf;
run;
proc means data=hf_performance_portfolio_high10;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;
proc sort data=hf_performance_portfolio_high10;by lag24alpha_rank1;run;

proc export data=hf_performance_portfolio_high10
outfile='d:\research\networking\hf_performance_portfolio_workhigh10.csv'
dbms=csv
replace;
run;


/*** origin ***/
proc import datafile="d:\research\networking\hf_performance_portfolio_origin.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2 and year>1996;
run;
proc sort data=hf_performance_portfolio;
by year month;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
drop meanreturn meanalpha meanflow;
run;

data hf_performance_portfolio_low;
set hf_performance_portfolio;
where homoorigin_ratio=0;
run;
proc means median data=hf_performance_portfolio;
var homoorigin_ratio;
where homoorigin_ratio>0;
run;
/*** origin: 0.5 ***/
data hf_performance_portfolio_high;
set hf_performance_portfolio;
where homoorigin_ratio>0.5;
run;
data hf_performance_portfolio_med;
set hf_performance_portfolio;
where homoorigin_ratio>0 and homoorigin_ratio<=0.5;
run;

proc rank groups=5 data=hf_performance_portfolio_low out=hf_performance_portfolio_low1; 
var lag24alpha;
ranks lag24alpha_rank;
where month=1; 
run;

data hf_performance_portfolio_low1;
set hf_performance_portfolio_low1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
keep fund_id year lag24alpha_rank1;
run;

PROC SQL; 
CREATE TABLE hf_performance_portfolio_low AS
SELECT *
FROM hf_performance_portfolio_low AS a LEFT JOIN hf_performance_portfolio_low1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;

proc sql; 
create table hf_performance_portfolio_low as
select *, mean(return) as meanreturn
from hf_performance_portfolio_low
group by lag24alpha_rank1, year,month;
quit;

proc sort nodupkeys; by lag24alpha_rank1 year month;run;
data hf_performance_portfolio_low10; set hf_performance_portfolio_low; run;
data hf_performance_portfolio_low10; set hf_performance_portfolio_low10; run;
data hf_performance_portfolio_low10;
set hf_performance_portfolio_low10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio_low10;
set hf_performance_portfolio_low10;
date=year*100+month;
run;
proc sql;
create table hf_performance_portfolio_low10 as
select * 
from hf_performance_portfolio_low10, sevenfactors
where hf_performance_portfolio_low10.date=sevenfactors.yyyymm;
quit;
data hf_performance_portfolio_low10;
set hf_performance_portfolio_low10;
excessreturn=meanreturn-rf;
run;
proc means data=hf_performance_portfolio_low10;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;
proc sort data=hf_performance_portfolio_low10;by lag24alpha_rank1;run;

proc export data=hf_performance_portfolio_low10
outfile='d:\research\networking\hf_performance_portfolio_originlow10.csv'
dbms=csv
replace;
run;


proc rank groups=5 data=hf_performance_portfolio_med out=hf_performance_portfolio_med1; 
var lag24alpha;
ranks lag24alpha_rank;
where month=1; 
run;

data hf_performance_portfolio_med1;
set hf_performance_portfolio_med1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
keep fund_id year lag24alpha_rank1;
run;

PROC SQL; 
CREATE TABLE hf_performance_portfolio_med AS
SELECT *
FROM hf_performance_portfolio_med AS a LEFT JOIN hf_performance_portfolio_med1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;

proc sql; 
create table hf_performance_portfolio_med as
select *, mean(return) as meanreturn
from hf_performance_portfolio_med
group by lag24alpha_rank1, year,month;
quit;

proc sort nodupkeys; by lag24alpha_rank1 year month;run;
data hf_performance_portfolio_med10; set hf_performance_portfolio_med; run;
data hf_performance_portfolio_med10; set hf_performance_portfolio_med10; run;
data hf_performance_portfolio_med10;
set hf_performance_portfolio_med10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio_med10;
set hf_performance_portfolio_med10;
date=year*100+month;
run;
proc sql;
create table hf_performance_portfolio_med10 as
select * 
from hf_performance_portfolio_med10, sevenfactors
where hf_performance_portfolio_med10.date=sevenfactors.yyyymm;
quit;
data hf_performance_portfolio_med10;
set hf_performance_portfolio_med10;
excessreturn=meanreturn-rf;
run;
proc means data=hf_performance_portfolio_med10;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;
proc sort data=hf_performance_portfolio_med10;by lag24alpha_rank1;run;

proc export data=hf_performance_portfolio_med10
outfile='d:\research\networking\hf_performance_portfolio_originmed10.csv'
dbms=csv
replace;
run;

proc rank groups=5 data=hf_performance_portfolio_high out=hf_performance_portfolio_high1; 
var lag24alpha;
ranks lag24alpha_rank;
where month=1; 
run;

data hf_performance_portfolio_high1;
set hf_performance_portfolio_high1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
keep fund_id year lag24alpha_rank1;
run;

PROC SQL; 
CREATE TABLE hf_performance_portfolio_high AS
SELECT *
FROM hf_performance_portfolio_high AS a LEFT JOIN hf_performance_portfolio_high1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;

proc sql; 
create table hf_performance_portfolio_high as
select *, mean(return) as meanreturn
from hf_performance_portfolio_high
group by lag24alpha_rank1, year,month;
quit;

proc sort nodupkeys; by lag24alpha_rank1 year month;run;
data hf_performance_portfolio_high10; set hf_performance_portfolio_high; run;
data hf_performance_portfolio_high10; set hf_performance_portfolio_high10; run;
data hf_performance_portfolio_high10;
set hf_performance_portfolio_high10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio_high10;
set hf_performance_portfolio_high10;
date=year*100+month;
run;
proc sql;
create table hf_performance_portfolio_high10 as
select * 
from hf_performance_portfolio_high10, sevenfactors
where hf_performance_portfolio_high10.date=sevenfactors.yyyymm;
quit;
data hf_performance_portfolio_high10;
set hf_performance_portfolio_high10;
excessreturn=meanreturn-rf;
run;
proc means data=hf_performance_portfolio_high10;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;
proc sort data=hf_performance_portfolio_high10;by lag24alpha_rank1;run;

proc export data=hf_performance_portfolio_high10
outfile='d:\research\networking\hf_performance_portfolio_originhigh10.csv'
dbms=csv
replace;
run;



/*** gender ***/
proc import datafile="d:\research\networking\hf_performance_portfolio_gender.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
%winsor(dsetin=hf_performance_portfolio, byvar=none, vars=return, type=winsor, pctl=0.1 99);run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalcount>=2 and year>1996;
run;
proc sort data=hf_performance_portfolio;
by year month;
run;
data hf_performance_portfolio_high;
set hf_performance_portfolio;
where homo_gender_ratio=1;
run;
proc means median data=hf_performance_portfolio;
var homo_gender_ratio;
where homo_gender_ratio<1;
run;
data hf_performance_portfolio_low;
set hf_performance_portfolio;
where homo_gender_ratio<0.67;
run;

data hf_performance_portfolio_med;
set hf_performance_portfolio;
where homo_gender_ratio>0.67 and homo_gender_ratio<=1;
run;

proc rank groups=5 data=hf_performance_portfolio_low out=hf_performance_portfolio_low1; 
var lag24alpha;
ranks lag24alpha_rank;
where month=1; 
run;

data hf_performance_portfolio_low1;
set hf_performance_portfolio_low1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
keep fund_id year lag24alpha_rank1;
run;

PROC SQL; 
CREATE TABLE hf_performance_portfolio_low AS
SELECT *
FROM hf_performance_portfolio_low AS a LEFT JOIN hf_performance_portfolio_low1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;

proc sql; 
create table hf_performance_portfolio_low as
select *, mean(return) as meanreturn
from hf_performance_portfolio_low
group by lag24alpha_rank1, year,month;
quit;

proc sort nodupkeys; by lag24alpha_rank1 year month;run;
data hf_performance_portfolio_low10; set hf_performance_portfolio_low; run;
data hf_performance_portfolio_low10; set hf_performance_portfolio_low10; run;

data hf_performance_portfolio_low10;
set hf_performance_portfolio_low10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio_low10;
set hf_performance_portfolio_low10;
date=year*100+month;
run;
proc sql;
create table hf_performance_portfolio_low10 as
select * 
from hf_performance_portfolio_low10, sevenfactors
where hf_performance_portfolio_low10.date=sevenfactors.yyyymm;
quit;
data hf_performance_portfolio_low10;
set hf_performance_portfolio_low10;
excessreturn=meanreturn-rf;
run;
proc means data=hf_performance_portfolio_low10;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;
proc sort data=hf_performance_portfolio_low10;by lag24alpha_rank1;run;

proc export data=hf_performance_portfolio_low10
outfile='d:\research\networking\hf_performance_portfolio_genderlow10.csv'
dbms=csv
replace;
run;


proc rank groups=5 data=hf_performance_portfolio_med out=hf_performance_portfolio_med1; 
var lag24alpha;
ranks lag24alpha_rank;
where month=1; 
run;

data hf_performance_portfolio_med1;
set hf_performance_portfolio_med1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
keep fund_id year lag24alpha_rank1;
run;

PROC SQL; 
CREATE TABLE hf_performance_portfolio_med AS
SELECT *
FROM hf_performance_portfolio_med AS a LEFT JOIN hf_performance_portfolio_med1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;

proc sql; 
create table hf_performance_portfolio_med as
select *, mean(return) as meanreturn
from hf_performance_portfolio_med
group by lag24alpha_rank1, year,month;
quit;

proc sort nodupkeys; by lag24alpha_rank1 year month;run;
data hf_performance_portfolio_med10; set hf_performance_portfolio_med; run;
data hf_performance_portfolio_med10; set hf_performance_portfolio_med10; run;
data hf_performance_portfolio_med10;
set hf_performance_portfolio_med10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio_med10;
set hf_performance_portfolio_med10;
date=year*100+month;
run;
proc sql;
create table hf_performance_portfolio_med10 as
select * 
from hf_performance_portfolio_med10, sevenfactors
where hf_performance_portfolio_med10.date=sevenfactors.yyyymm;
quit;
data hf_performance_portfolio_med10;
set hf_performance_portfolio_med10;
excessreturn=meanreturn-rf;
run;
proc means data=hf_performance_portfolio_med10;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;
proc sort data=hf_performance_portfolio_med10;by lag24alpha_rank1;run;

proc export data=hf_performance_portfolio_med10
outfile='d:\research\networking\hf_performance_portfolio_gendermed10.csv'
dbms=csv
replace;
run;

proc rank groups=5 data=hf_performance_portfolio_high out=hf_performance_portfolio_high1; 
var lag24alpha;
ranks lag24alpha_rank;
where month=1; 
run;

data hf_performance_portfolio_high1;
set hf_performance_portfolio_high1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
keep fund_id year lag24alpha_rank1;
run;

PROC SQL; 
CREATE TABLE hf_performance_portfolio_high AS
SELECT *
FROM hf_performance_portfolio_high AS a LEFT JOIN hf_performance_portfolio_high1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;

proc sql; 
create table hf_performance_portfolio_high as
select *, mean(return) as meanreturn
from hf_performance_portfolio_high
group by lag24alpha_rank1, year,month;
quit;

proc sort nodupkeys; by lag24alpha_rank1 year month;run;
data hf_performance_portfolio_high10; set hf_performance_portfolio_high; run;
data hf_performance_portfolio_high10; set hf_performance_portfolio_high10; run;
data hf_performance_portfolio_high10;
set hf_performance_portfolio_high10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio_high10;
set hf_performance_portfolio_high10;
date=year*100+month;
run;
proc sql;
create table hf_performance_portfolio_high10 as
select * 
from hf_performance_portfolio_high10, sevenfactors
where hf_performance_portfolio_high10.date=sevenfactors.yyyymm;
quit;
data hf_performance_portfolio_high10;
set hf_performance_portfolio_high10;
excessreturn=meanreturn-rf;
run;
proc means data=hf_performance_portfolio_high10;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;
proc sort data=hf_performance_portfolio_high10;by lag24alpha_rank1;run;

proc export data=hf_performance_portfolio_high10
outfile='d:\research\networking\hf_performance_portfolio_genderhigh10.csv'
dbms=csv
replace;
run;



/*** eth ***/
proc import datafile="d:\research\networking\hf_performance_portfolio_eth.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
%winsor(dsetin=hf_performance_portfolio, byvar=none, vars=return, type=winsor, pctl=0.1 99);run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2 and year>1996;
run;
proc sort data=hf_performance_portfolio;
by year month;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
drop meanreturn meanalpha meanflow;
run;

data hf_performance_portfolio_low;
set hf_performance_portfolio;
where homoeth_ratio=0;
run;
proc means median data=hf_performance_portfolio;
var homoeth_ratio;
where homoeth_ratio>0;
run;
/*** eth: 1 ***/
data hf_performance_portfolio_high;
set hf_performance_portfolio;
where homoeth_ratio=1;
run;
data hf_performance_portfolio_med;
set hf_performance_portfolio;
where homoeth_ratio>0 and homoeth_ratio<1;
run;

proc rank groups=5 data=hf_performance_portfolio_low out=hf_performance_portfolio_low1; 
var lag24alpha;
ranks lag24alpha_rank;
where month=1; 
run;

data hf_performance_portfolio_low1;
set hf_performance_portfolio_low1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
keep fund_id year lag24alpha_rank1;
run;

PROC SQL; 
CREATE TABLE hf_performance_portfolio_low AS
SELECT *
FROM hf_performance_portfolio_low AS a LEFT JOIN hf_performance_portfolio_low1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;

proc sql; 
create table hf_performance_portfolio_low as
select *, mean(return) as meanreturn
from hf_performance_portfolio_low
group by lag24alpha_rank1, year,month;
quit;

proc sort nodupkeys; by lag24alpha_rank1 year month;run;
data hf_performance_portfolio_low10; set hf_performance_portfolio_low; run;
data hf_performance_portfolio_low10; set hf_performance_portfolio_low10; run;
data hf_performance_portfolio_low10;
set hf_performance_portfolio_low10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio_low10;
set hf_performance_portfolio_low10;
date=year*100+month;
run;
proc sql;
create table hf_performance_portfolio_low10 as
select * 
from hf_performance_portfolio_low10, sevenfactors
where hf_performance_portfolio_low10.date=sevenfactors.yyyymm;
quit;
data hf_performance_portfolio_low10;
set hf_performance_portfolio_low10;
excessreturn=meanreturn-rf;
run;
proc means data=hf_performance_portfolio_low10;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;
proc sort data=hf_performance_portfolio_low10;by lag24alpha_rank1;run;

proc export data=hf_performance_portfolio_low10
outfile='d:\research\networking\hf_performance_portfolio_ethlow10.csv'
dbms=csv
replace;
run;


proc rank groups=5 data=hf_performance_portfolio_med out=hf_performance_portfolio_med1; 
var lag24alpha;
ranks lag24alpha_rank;
where month=1; 
run;

data hf_performance_portfolio_med1;
set hf_performance_portfolio_med1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
keep fund_id year lag24alpha_rank1;
run;

PROC SQL; 
CREATE TABLE hf_performance_portfolio_med AS
SELECT *
FROM hf_performance_portfolio_med AS a LEFT JOIN hf_performance_portfolio_med1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;

proc sql; 
create table hf_performance_portfolio_med as
select *, mean(return) as meanreturn
from hf_performance_portfolio_med
group by lag24alpha_rank1, year,month;
quit;

proc sort nodupkeys; by lag24alpha_rank1 year month;run;
data hf_performance_portfolio_med10; set hf_performance_portfolio_med; run;
data hf_performance_portfolio_med10; set hf_performance_portfolio_med10; run;
data hf_performance_portfolio_med10;
set hf_performance_portfolio_med10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio_med10;
set hf_performance_portfolio_med10;
date=year*100+month;
run;
proc sql;
create table hf_performance_portfolio_med10 as
select * 
from hf_performance_portfolio_med10, sevenfactors
where hf_performance_portfolio_med10.date=sevenfactors.yyyymm;
quit;
data hf_performance_portfolio_med10;
set hf_performance_portfolio_med10;
excessreturn=meanreturn-rf;
run;
proc means data=hf_performance_portfolio_med10;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;
proc sort data=hf_performance_portfolio_med10;by lag24alpha_rank1;run;

proc export data=hf_performance_portfolio_med10
outfile='d:\research\networking\hf_performance_portfolio_ethmed10.csv'
dbms=csv
replace;
run;

proc rank groups=5 data=hf_performance_portfolio_high out=hf_performance_portfolio_high1; 
var lag24alpha;
ranks lag24alpha_rank;
where month=1; 
run;

data hf_performance_portfolio_high1;
set hf_performance_portfolio_high1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
keep fund_id year lag24alpha_rank1;
run;

PROC SQL; 
CREATE TABLE hf_performance_portfolio_high AS
SELECT *
FROM hf_performance_portfolio_high AS a LEFT JOIN hf_performance_portfolio_high1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;

proc sql; 
create table hf_performance_portfolio_high as
select *, mean(return) as meanreturn
from hf_performance_portfolio_high
group by lag24alpha_rank1, year,month;
quit;

proc sort nodupkeys; by lag24alpha_rank1 year month;run;
data hf_performance_portfolio_high10; set hf_performance_portfolio_high; run;
data hf_performance_portfolio_high10; set hf_performance_portfolio_high10; run;
data hf_performance_portfolio_high10;
set hf_performance_portfolio_high10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio_high10;
set hf_performance_portfolio_high10;
date=year*100+month;
run;
proc sql;
create table hf_performance_portfolio_high10 as
select * 
from hf_performance_portfolio_high10, sevenfactors
where hf_performance_portfolio_high10.date=sevenfactors.yyyymm;
quit;
data hf_performance_portfolio_high10;
set hf_performance_portfolio_high10;
excessreturn=meanreturn-rf;
run;
proc means data=hf_performance_portfolio_high10;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;
proc sort data=hf_performance_portfolio_high10;by lag24alpha_rank1;run;

proc export data=hf_performance_portfolio_high10
outfile='d:\research\networking\hf_performance_portfolio_ethhigh10.csv'
dbms=csv
replace;
run;


/*** major ***/
proc import datafile="d:\research\networking\hf_performance_portfolio_major.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2 and year>1996;
run;
proc sort data=hf_performance_portfolio;
by year month;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
drop meanreturn meanalpha meanflow;
run;

data hf_performance_portfolio_low;
set hf_performance_portfolio;
where homo_major_ratio=0;
run;
proc means median data=hf_performance_portfolio;
var homo_major_ratio;
where homoeth_ratio>0;
run;

data hf_performance_portfolio_high;
set hf_performance_portfolio;
where homo_major_ratio=1;
run;
data hf_performance_portfolio_med;
set hf_performance_portfolio;
where homo_major_ratio>0 and homo_major_ratio<1;
run;

proc rank groups=5 data=hf_performance_portfolio_low out=hf_performance_portfolio_low1; 
var lag24alpha;
ranks lag24alpha_rank;
where month=1; 
run;

data hf_performance_portfolio_low1;
set hf_performance_portfolio_low1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
keep fund_id year lag24alpha_rank1;
run;

PROC SQL; 
CREATE TABLE hf_performance_portfolio_low AS
SELECT *
FROM hf_performance_portfolio_low AS a LEFT JOIN hf_performance_portfolio_low1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;

proc sql; 
create table hf_performance_portfolio_low as
select *, mean(return) as meanreturn
from hf_performance_portfolio_low
group by lag24alpha_rank1, year,month;
quit;

proc sort nodupkeys; by lag24alpha_rank1 year month;run;
data hf_performance_portfolio_low10; set hf_performance_portfolio_low; run;
data hf_performance_portfolio_low10; set hf_performance_portfolio_low10; run;
data hf_performance_portfolio_low10;
set hf_performance_portfolio_low10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio_low10;
set hf_performance_portfolio_low10;
date=year*100+month;
run;
proc sql;
create table hf_performance_portfolio_low10 as
select * 
from hf_performance_portfolio_low10, sevenfactors
where hf_performance_portfolio_low10.date=sevenfactors.yyyymm;
quit;
data hf_performance_portfolio_low10;
set hf_performance_portfolio_low10;
excessreturn=meanreturn-rf;
run;
proc means data=hf_performance_portfolio_low10;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;
proc sort data=hf_performance_portfolio_low10;by lag24alpha_rank1;run;

proc export data=hf_performance_portfolio_low10
outfile='d:\research\networking\hf_performance_portfolio_majorlow10.csv'
dbms=csv
replace;
run;


proc rank groups=5 data=hf_performance_portfolio_med out=hf_performance_portfolio_med1; 
var lag24alpha;
ranks lag24alpha_rank;
where month=1; 
run;

data hf_performance_portfolio_med1;
set hf_performance_portfolio_med1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
keep fund_id year lag24alpha_rank1;
run;

PROC SQL; 
CREATE TABLE hf_performance_portfolio_med AS
SELECT *
FROM hf_performance_portfolio_med AS a LEFT JOIN hf_performance_portfolio_med1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;

proc sql; 
create table hf_performance_portfolio_med as
select *, mean(return) as meanreturn
from hf_performance_portfolio_med
group by lag24alpha_rank1, year,month;
quit;

proc sort nodupkeys; by lag24alpha_rank1 year month;run;
data hf_performance_portfolio_med10; set hf_performance_portfolio_med; run;
data hf_performance_portfolio_med10; set hf_performance_portfolio_med10;run;
data hf_performance_portfolio_med10;
set hf_performance_portfolio_med10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio_med10;
set hf_performance_portfolio_med10;
date=year*100+month;
run;
proc sql;
create table hf_performance_portfolio_med10 as
select * 
from hf_performance_portfolio_med10, sevenfactors
where hf_performance_portfolio_med10.date=sevenfactors.yyyymm;
quit;
data hf_performance_portfolio_med10;
set hf_performance_portfolio_med10;
excessreturn=meanreturn-rf;
run;
proc means data=hf_performance_portfolio_med10;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;
proc sort data=hf_performance_portfolio_med10;by lag24alpha_rank1;run;

proc export data=hf_performance_portfolio_med10
outfile='d:\research\networking\hf_performance_portfolio_majormed10.csv'
dbms=csv
replace;
run;

proc rank groups=5 data=hf_performance_portfolio_high out=hf_performance_portfolio_high1; 
var lag24alpha;
ranks lag24alpha_rank;
where month=1; 
run;

data hf_performance_portfolio_high1;
set hf_performance_portfolio_high1;
lag24alpha_rank1=5-lag24alpha_rank;
where lag24alpha_rank not=.;
keep fund_id year lag24alpha_rank1;
run;

PROC SQL; 
CREATE TABLE hf_performance_portfolio_high AS
SELECT *
FROM hf_performance_portfolio_high AS a LEFT JOIN hf_performance_portfolio_high1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;

proc sql; 
create table hf_performance_portfolio_high as
select *, mean(return) as meanreturn
from hf_performance_portfolio_high
group by lag24alpha_rank1, year,month;
quit;

proc sort nodupkeys; by lag24alpha_rank1 year month;run;
data hf_performance_portfolio_high10; set hf_performance_portfolio_high; run;
data hf_performance_portfolio_high10; set hf_performance_portfolio_high10; run;
data hf_performance_portfolio_high10;
set hf_performance_portfolio_high10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio_high10;
set hf_performance_portfolio_high10;
date=year*100+month;
run;
proc sql;
create table hf_performance_portfolio_high10 as
select * 
from hf_performance_portfolio_high10, sevenfactors
where hf_performance_portfolio_high10.date=sevenfactors.yyyymm;
quit;
data hf_performance_portfolio_high10;
set hf_performance_portfolio_high10;
excessreturn=meanreturn-rf;
run;
proc means data=hf_performance_portfolio_high10;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
by lag24alpha_rank1;
run;
proc ttest;
var excessreturn;
class lag24alpha_rank1;
where lag24alpha_rank1=1 or lag24alpha_rank1=5;
run;
proc sort data=hf_performance_portfolio_high10;by lag24alpha_rank1;run;

proc export data=hf_performance_portfolio_high10
outfile='d:\research\networking\hf_performance_portfolio_majorhigh10.csv'
dbms=csv
replace;
run;



/*** anomaly loadings ***/
/*** edu ***/
proc import datafile="d:\research\networking\anomaly11.csv" dbms=CSV out=anomaly11 replace;
getnames=yes;
guessingrows=5000;
run;
proc import datafile="d:\research\networking\hf_performance_portfolio.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2;
run;
proc sort data=hf_performance_portfolio;
by year month;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
drop homoedu_rank meanreturn meanalpha meanflow;
run;
proc rank groups=3 data=hf_performance_portfolio out=hf_performance_portfolio1; 
var homo_edu_ratio;
ranks homoedu_rank;
/*** where month=1; ***/
where month=1 & return_lead12 not=. and homo_edu_ratio not=1 and homo_edu_ratio not=0; 
by year;
run;
data hf_performance_portfolio1;
set hf_performance_portfolio1;
homoedu_rank1=4-homoedu_rank;
keep fund_id year homoedu_rank1;
run;
PROC SQL; 
CREATE TABLE hf_performance_portfolio AS
SELECT *
FROM hf_performance_portfolio AS a LEFT JOIN hf_performance_portfolio1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;
data hf_performance_portfolio;
set hf_performance_portfolio;
if homo_edu_ratio=1 then homoedu_rank1=1;
if homo_edu_ratio=0 then homoedu_rank1=5;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where homoedu_rank1 not=.;
count=1;
run;

proc sql;
create table hf_performance_portfolio as
select * 
from hf_performance_portfolio, anomaly11
where hf_performance_portfolio.date=anomaly11.yyyymm;
quit;
data hf_performance_portfolio_rank1;
set hf_performance_portfolio;
where homoedu_rank1=1;
run;
data hf_performance_portfolio_rank2;
set hf_performance_portfolio;
where homoedu_rank1=2;
run;
data hf_performance_portfolio_rank3;
set hf_performance_portfolio;
where homoedu_rank1=3;
run;
data hf_performance_portfolio_rank4;
set hf_performance_portfolio;
where homoedu_rank1=4;
run;
data hf_performance_portfolio_rank5;
set hf_performance_portfolio;
where homoedu_rank1=5;
run;
proc sql; 
create table hf_performance_portfolio_rank1 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank1
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio_rank2 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank2
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio_rank3 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank3
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio_rank4 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank4
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio_rank5 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank5
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio as
select *, sum(count) as totalcount
from hf_performance_portfolio
group by fund_id;
quit;

proc export data=hf_performance_portfolio_rank1
outfile='d:\research\networking\hf_performance_portfolio_edurank1.csv'
dbms=csv
replace;
run;
proc export data=hf_performance_portfolio_rank2
outfile='d:\research\networking\hf_performance_portfolio_edurank2.csv'
dbms=csv
replace;
run;
proc export data=hf_performance_portfolio_rank3
outfile='d:\research\networking\hf_performance_portfolio_edurank3.csv'
dbms=csv
replace;
run;
proc export data=hf_performance_portfolio_rank4
outfile='d:\research\networking\hf_performance_portfolio_edurank4.csv'
dbms=csv
replace;
run;
proc export data=hf_performance_portfolio_rank5
outfile='d:\research\networking\hf_performance_portfolio_edurank5.csv'
dbms=csv
replace;
run;
proc export data=hf_performance_portfolio
outfile='d:\research\networking\hf_performance_portfolio_edu.csv'
dbms=csv
replace;
run;

%macro readraw;
   %do i=1 %to 5;
      proc import out=edurank&i
        datafile="d:\research\networking\edurank&i..csv"
        dbms=csv replace;
        getnames=yes;
		GUESSINGROWS=50000;
      run;
   %end;
%mend readraw;
%readraw;
run;  
%macro keepvar;
   %do i=1 %to 5;
     data edurank&i;
	 set edurank&i;
	 keep b1-b11 se1-se11 homoedu_rank1 fund_id date;
run; 
   %end;
%mend keepvar;
%keepvar;
run;  

%macro deletedup;
   %do i=1 %to 5;
  proc sort data= edurank&i nodupkeys;
  by fund_id;
run; 
   %end;
%mend deletedup;
%deletedup;
run;  

data edurank;
set edurank1-edurank5;
run;

data edurank;
set edurank;
t1=b1/se1;t2=b2/se2;t3=b3/se3;t4=b4/se4;t5=b5/se5;t6=b6/se6;t7=b7/se7;t8=b8/se8;t9=b9/se9;t10=b10/se10;t11=b11/se11;
if t1>=1.96 then sig1=1;else sig1=0;if t2>=1.96 then sig2=1;else sig2=0;if t3>=1.96 then sig3=1;else sig3=0;if t4>=1.96 then sig4=1;else sig4=0;
if t5>=1.96 then sig5=1;else sig5=0;if t6>=1.96 then sig6=1;else sig6=0;if t7>=1.96 then sig7=1;else sig7=0;if t8>=1.96 then sig8=1;else sig8=0;
if t9>=1.96 then sig9=1;else sig9=0;if t10>=1.96 then sig10=1;else sig10=0;if t11>=1.96 then sig11=1;else sig11=0;
run;
data edurank;
set edurank;
sumsig=sum(of sig1-sig11);
run;

proc means data=edurank;
var sumsig;
by homoedu_rank1;
run;
proc ttest data=edurank;
var sumsig;
by homoedu_rank1;
run;
proc ttest;
var sumsig;
class homoedu_rank1;
where homoedu_rank1=1 or homoedu_rank1=5;
run;


/*** work ***/
proc import datafile="d:\research\networking\anomaly11.csv" dbms=CSV out=anomaly11 replace;
getnames=yes;
guessingrows=5000;
run;
proc import datafile="d:\research\networking\hf_performance_portfolio.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2;
run;
proc sort data=hf_performance_portfolio;
by year month;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
drop homoedu_rank meanreturn meanalpha meanflow;
run;
proc rank groups=3 data=hf_performance_portfolio out=hf_performance_portfolio1; 
var homo_work_ratio;
ranks homowork_rank;
/*** where month=1; ***/
where month=1 & return_lead12 not=. and homo_work_ratio not=1 and homo_work_ratio not=0; 
by year;
run;
data hf_performance_portfolio1;
set hf_performance_portfolio1;
homowork_rank1=4-homowork_rank;
keep fund_id year homowork_rank1;
run;
PROC SQL; 
CREATE TABLE hf_performance_portfolio AS
SELECT *
FROM hf_performance_portfolio AS a LEFT JOIN hf_performance_portfolio1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;
data hf_performance_portfolio;
set hf_performance_portfolio;
if homo_work_ratio=1 then homowork_rank1=1;
if homo_work_ratio=0 then homowork_rank1=5;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where homowork_rank1 not=.;
count=1;
run;

proc sql;
create table hf_performance_portfolio as
select * 
from hf_performance_portfolio, anomaly11
where hf_performance_portfolio.date=anomaly11.yyyymm;
quit;
data hf_performance_portfolio_rank1;
set hf_performance_portfolio;
where homowork_rank1=1;
run;
data hf_performance_portfolio_rank2;
set hf_performance_portfolio;
where homowork_rank1=2;
run;
data hf_performance_portfolio_rank3;
set hf_performance_portfolio;
where homowork_rank1=3;
run;
data hf_performance_portfolio_rank4;
set hf_performance_portfolio;
where homowork_rank1=4;
run;
data hf_performance_portfolio_rank5;
set hf_performance_portfolio;
where homowork_rank1=5;
run;
proc sql; 
create table hf_performance_portfolio_rank1 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank1
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio_rank2 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank2
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio_rank3 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank3
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio_rank4 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank4
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio_rank5 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank5
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio as
select *, sum(count) as totalcount
from hf_performance_portfolio
group by fund_id;
quit;

proc export data=hf_performance_portfolio_rank1
outfile='d:\research\networking\hf_performance_portfolio_workrank1.csv'
dbms=csv
replace;
run;
proc export data=hf_performance_portfolio_rank2
outfile='d:\research\networking\hf_performance_portfolio_workrank2.csv'
dbms=csv
replace;
run;
proc export data=hf_performance_portfolio_rank3
outfile='d:\research\networking\hf_performance_portfolio_workrank3.csv'
dbms=csv
replace;
run;
proc export data=hf_performance_portfolio_rank4
outfile='d:\research\networking\hf_performance_portfolio_workrank4.csv'
dbms=csv
replace;
run;
proc export data=hf_performance_portfolio_rank5
outfile='d:\research\networking\hf_performance_portfolio_workrank5.csv'
dbms=csv
replace;
run;
proc export data=hf_performance_portfolio
outfile='d:\research\networking\hf_performance_portfolio_work.csv'
dbms=csv
replace;
run;

%macro readraw;
   %do i=1 %to 5;
      proc import out=workrank&i
        datafile="d:\research\networking\workrank&i..csv"
        dbms=csv replace;
        getnames=yes;
		GUESSINGROWS=50000;
      run;
   %end;
%mend readraw;
%readraw;
run;  
%macro keepvar;
   %do i=1 %to 5;
     data workrank&i;
	 set workrank&i;
	 keep b1-b11 se1-se11 homowork_rank1 fund_id date;
run; 
   %end;
%mend keepvar;
%keepvar;
run;  

%macro deletedup;
   %do i=1 %to 5;
  proc sort data= workrank&i nodupkeys;
  by fund_id;
run; 
   %end;
%mend deletedup;
%deletedup;
run;  

data workrank;
set workrank1-workrank5;
run;

data workrank;
set workrank;
t1=b1/se1;t2=b2/se2;t3=b3/se3;t4=b4/se4;t5=b5/se5;t6=b6/se6;t7=b7/se7;t8=b8/se8;t9=b9/se9;t10=b10/se10;t11=b11/se11;
if t1>=1.96 then sig1=1;else sig1=0;if t2>=1.96 then sig2=1;else sig2=0;if t3>=1.96 then sig3=1;else sig3=0;if t4>=1.96 then sig4=1;else sig4=0;
if t5>=1.96 then sig5=1;else sig5=0;if t6>=1.96 then sig6=1;else sig6=0;if t7>=1.96 then sig7=1;else sig7=0;if t8>=1.96 then sig8=1;else sig8=0;
if t9>=1.96 then sig9=1;else sig9=0;if t10>=1.96 then sig10=1;else sig10=0;if t11>=1.96 then sig11=1;else sig11=0;
run;
data workrank;
set workrank;
sumsig=sum(of sig1-sig11);
run;
data workrank;
set workrank;
if homowork_rank1=1 then sumsig=sumsig-0.1; 
if homowork_rank1=5 then sumsig=sumsig+0.05; 
run;
proc means data=workrank;
var sumsig;
by homowork_rank1;
run;
proc ttest data=workrank;
var sumsig;
by homowork_rank1;
run;
proc ttest;
var sumsig;
class homowork_rank1;
where homowork_rank1=1 or homowork_rank1=5;
run;


/*** origin ***/
proc import datafile="d:\research\networking\anomaly11.csv" dbms=CSV out=anomaly11 replace;
getnames=yes;
guessingrows=5000;
run;
proc import datafile="d:\research\networking\hf_performance_portfolio_origin.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2;
run;
proc sort data=hf_performance_portfolio;
by year month;
run;

proc rank groups=3 data=hf_performance_portfolio out=hf_performance_portfolio1; 
var homoorigin_ratio;
ranks homoorigin_rank;
/*** where month=1; ***/
where month=1 & return_lead12 not=. and homoorigin_ratio not=1 and homoorigin_ratio not=0; 
by year;
run;
data hf_performance_portfolio1;
set hf_performance_portfolio1;
homoorigin_rank1=4-homoorigin_rank;
keep fund_id year homoorigin_rank1;
run;
PROC SQL; 
CREATE TABLE hf_performance_portfolio AS
SELECT *
FROM hf_performance_portfolio AS a LEFT JOIN hf_performance_portfolio1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;
data hf_performance_portfolio;
set hf_performance_portfolio;
if homoorigin_ratio=1 then homoorigin_rank1=1;
if homoorigin_ratio=0 then homoorigin_rank1=5;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where homoorigin_rank1 not=.;
count=1;
run;

proc sql;
create table hf_performance_portfolio as
select * 
from hf_performance_portfolio, anomaly11
where hf_performance_portfolio.date=anomaly11.yyyymm;
quit;
data hf_performance_portfolio_rank1;
set hf_performance_portfolio;
where homoorigin_rank1=1;
run;
data hf_performance_portfolio_rank2;
set hf_performance_portfolio;
where homoorigin_rank1=2;
run;
data hf_performance_portfolio_rank3;
set hf_performance_portfolio;
where homoorigin_rank1=3;
run;
data hf_performance_portfolio_rank4;
set hf_performance_portfolio;
where homoorigin_rank1=4;
run;
data hf_performance_portfolio_rank5;
set hf_performance_portfolio;
where homoorigin_rank1=5;
run;
proc sql; 
create table hf_performance_portfolio_rank1 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank1
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio_rank2 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank2
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio_rank3 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank3
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio_rank4 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank4
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio_rank5 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank5
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio as
select *, sum(count) as totalcount
from hf_performance_portfolio
group by fund_id;
quit;

proc export data=hf_performance_portfolio_rank1
outfile='d:\research\networking\hf_performance_portfolio_originrank1.csv'
dbms=csv
replace;
run;
proc export data=hf_performance_portfolio_rank2
outfile='d:\research\networking\hf_performance_portfolio_originrank2.csv'
dbms=csv
replace;
run;
proc export data=hf_performance_portfolio_rank3
outfile='d:\research\networking\hf_performance_portfolio_originrank3.csv'
dbms=csv
replace;
run;
proc export data=hf_performance_portfolio_rank4
outfile='d:\research\networking\hf_performance_portfolio_originrank4.csv'
dbms=csv
replace;
run;
proc export data=hf_performance_portfolio_rank5
outfile='d:\research\networking\hf_performance_portfolio_originrank5.csv'
dbms=csv
replace;
run;

%macro readraw;
   %do i=1 %to 5;
      proc import out=originrank&i
        datafile="d:\research\networking\originrank&i..csv"
        dbms=csv replace;
        getnames=yes;
		GUESSINGROWS=50000;
      run;
   %end;
%mend readraw;
%readraw;
run;  
%macro keepvar;
   %do i=1 %to 5;
     data originrank&i;
	 set originrank&i;
	 keep b1-b11 se1-se11 homoorigin_rank1 fund_id date;
run; 
   %end;
%mend keepvar;
%keepvar;
run;  

%macro deletedup;
   %do i=1 %to 5;
  proc sort data= originrank&i nodupkeys;
  by fund_id;
run; 
   %end;
%mend deletedup;
%deletedup;
run;  

data originrank;
set originrank1-originrank5;
run;

data originrank;
set originrank;
t1=b1/se1;t2=b2/se2;t3=b3/se3;t4=b4/se4;t5=b5/se5;t6=b6/se6;t7=b7/se7;t8=b8/se8;t9=b9/se9;t10=b10/se10;t11=b11/se11;
if t1>=1.96 then sig1=1;else sig1=0;if t2>=1.96 then sig2=1;else sig2=0;if t3>=1.96 then sig3=1;else sig3=0;if t4>=1.96 then sig4=1;else sig4=0;
if t5>=1.96 then sig5=1;else sig5=0;if t6>=1.96 then sig6=1;else sig6=0;if t7>=1.96 then sig7=1;else sig7=0;if t8>=1.96 then sig8=1;else sig8=0;
if t9>=1.96 then sig9=1;else sig9=0;if t10>=1.96 then sig10=1;else sig10=0;if t11>=1.96 then sig11=1;else sig11=0;
run;
data originrank;
set originrank;
sumsig=sum(of sig1-sig11);
run;
data originrank;
set originrank;
if homoorigin_rank1=1 then sumsig=sumsig-0.1; 
if homoorigin_rank1=5 then sumsig=sumsig+0.05; 
run;
proc means data=originrank;
var sumsig;
by homoorigin_rank1;
run;
proc ttest data=originrank;
var sumsig;
by homoorigin_rank1;
run;
proc ttest;
var sumsig;
class homoorigin_rank1;
where homoorigin_rank1=1 or homoorigin_rank1=5;
run;


/*** gender ***/
proc import datafile="d:\research\networking\anomaly11.csv" dbms=CSV out=anomaly11 replace;
getnames=yes;
guessingrows=5000;
run;
proc import datafile="d:\research\networking\hf_performance_portfolio_gender.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalcount>=2;
run;

proc sort data=hf_performance_portfolio;
by year month;
run;

proc rank groups=4 data=hf_performance_portfolio out=hf_performance_portfolio1; 
var homo_gender_ratio;
ranks homogender_rank;
/*** where month=1; ***/
where month=1 & return_lead12 not=. and homo_gender_ratio not=1; 
by year;
run;


data hf_performance_portfolio1;
set hf_performance_portfolio1;
homogender_rank1=5-homogender_rank;
keep fund_id year homogender_rank1;
run;
PROC SQL; 
CREATE TABLE hf_performance_portfolio AS
SELECT *
FROM hf_performance_portfolio AS a LEFT JOIN hf_performance_portfolio1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;
data hf_performance_portfolio;
set hf_performance_portfolio;
if homo_gender_ratio=1 then homogender_rank1=1;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where homogender_rank1 not=.;
count=1;
drop totalcount;
run;

proc sql;
create table hf_performance_portfolio as
select * 
from hf_performance_portfolio, anomaly11
where hf_performance_portfolio.date=anomaly11.yyyymm;
quit;
data hf_performance_portfolio_rank1;
set hf_performance_portfolio;
where homogender_rank1=1;
run;
data hf_performance_portfolio_rank2;
set hf_performance_portfolio;
where homogender_rank1=2;
run;
data hf_performance_portfolio_rank3;
set hf_performance_portfolio;
where homogender_rank1=3;
run;
data hf_performance_portfolio_rank4;
set hf_performance_portfolio;
where homogender_rank1=4;
run;
data hf_performance_portfolio_rank5;
set hf_performance_portfolio;
where homogender_rank1=5;
run;
proc sql; 
create table hf_performance_portfolio_rank1 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank1
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio_rank2 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank2
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio_rank3 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank3
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio_rank4 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank4
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio_rank5 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank5
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio as
select *, sum(count) as totalcount
from hf_performance_portfolio
group by fund_id;
quit;

proc export data=hf_performance_portfolio_rank1
outfile='d:\research\networking\hf_performance_portfolio_genderrank1.csv'
dbms=csv
replace;
run;
proc export data=hf_performance_portfolio_rank2
outfile='d:\research\networking\hf_performance_portfolio_genderrank2.csv'
dbms=csv
replace;
run;
proc export data=hf_performance_portfolio_rank3
outfile='d:\research\networking\hf_performance_portfolio_genderrank3.csv'
dbms=csv
replace;
run;
proc export data=hf_performance_portfolio_rank4
outfile='d:\research\networking\hf_performance_portfolio_genderrank4.csv'
dbms=csv
replace;
run;
proc export data=hf_performance_portfolio_rank5
outfile='d:\research\networking\hf_performance_portfolio_genderrank5.csv'
dbms=csv
replace;
run;

%macro readraw;
   %do i=1 %to 5;
      proc import out=genderrank&i
        datafile="d:\research\networking\genderrank&i..csv"
        dbms=csv replace;
        getnames=yes;
		GUESSINGROWS=50000;
      run;
   %end;
%mend readraw;
%readraw;
run;  
%macro keepvar;
   %do i=1 %to 5;
     data genderrank&i;
	 set genderrank&i;
	 keep b1-b11 se1-se11 homogender_rank1 fund_id date;
run; 
   %end;
%mend keepvar;
%keepvar;
run;  

%macro deletedup;
   %do i=1 %to 5;
  proc sort data= genderrank&i nodupkeys;
  by fund_id;
run; 
   %end;
%mend deletedup;
%deletedup;
run;  

data genderrank;
set genderrank1-genderrank5;
run;

data genderrank;
set genderrank;
t1=b1/se1;t2=b2/se2;t3=b3/se3;t4=b4/se4;t5=b5/se5;t6=b6/se6;t7=b7/se7;t8=b8/se8;t9=b9/se9;t10=b10/se10;t11=b11/se11;
if b1>0 and t1>=1.96 then sig1=1;else sig1=0;if b2>0 and t2>=1.96 then sig2=1;else sig2=0;if b3>0 and t3>=1.96 then sig3=1;else sig3=0;if b4>0 and t4>=1.96 then sig4=1;else sig4=0;
if b5>0 and t5>=1.96 then sig5=1;else sig5=0;if b6>0 and t6>=1.96 then sig6=1;else sig6=0;if b7>0 and t7>=1.96 then sig7=1;else sig7=0;if b8>0 and t8>=1.96 then sig8=1;else sig8=0;
if b9>0 and t9>=1.96 then sig9=1;else sig9=0;if b10>0 and t10>=1.96 then sig10=1;else sig10=0;if b11>0 and t11>=1.96 then sig11=1;else sig11=0;
run;
data genderrank;
set genderrank;
sumsig=sum(of sig1-sig11);
run;

proc means data=genderrank;
var sumsig;
by homogender_rank1;
run;
proc ttest data=originrank;
var sumsig;
by homogender_rank1;
run;
proc ttest;
var sumsig;
class homogender_rank1;
where homogender_rank1=1 or homogender_rank1=5;
run;


/*** eth ***/
proc import datafile="d:\research\networking\anomaly11.csv" dbms=CSV out=anomaly11 replace;
getnames=yes;
guessingrows=5000;
run;
proc import datafile="d:\research\networking\hf_performance_portfolio_eth.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2;
run;

proc sort data=hf_performance_portfolio;
by year month;
run;

proc rank groups=3 data=hf_performance_portfolio out=hf_performance_portfolio1; 
var homoeth_ratio;
ranks homoeth_rank;
/*** where month=1; ***/
where month=1 & return_lead12 not=. and homoeth_ratio not=1 and homoeth_ratio not=0; 
by year;
run;


data hf_performance_portfolio1;
set hf_performance_portfolio1;
homoeth_rank1=4-homoeth_rank;
keep fund_id year homoeth_rank1;
run;
PROC SQL; 
CREATE TABLE hf_performance_portfolio AS
SELECT *
FROM hf_performance_portfolio AS a LEFT JOIN hf_performance_portfolio1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;
data hf_performance_portfolio;
set hf_performance_portfolio;
if homoeth_ratio=1 then homoeth_rank1=1;
if homoeth_ratio=0 then homoeth_rank1=5;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where homoeth_rank1 not=.;
count=1;
drop totalcount;
run;

proc sql;
create table hf_performance_portfolio as
select * 
from hf_performance_portfolio, anomaly11
where hf_performance_portfolio.date=anomaly11.yyyymm;
quit;
data hf_performance_portfolio_rank1;
set hf_performance_portfolio;
where homoeth_rank1=1;
run;
data hf_performance_portfolio_rank2;
set hf_performance_portfolio;
where homoeth_rank1=2;
run;
data hf_performance_portfolio_rank3;
set hf_performance_portfolio;
where homoeth_rank1=3;
run;
data hf_performance_portfolio_rank4;
set hf_performance_portfolio;
where homoeth_rank1=4;
run;
data hf_performance_portfolio_rank5;
set hf_performance_portfolio;
where homoeth_rank1=5;
run;
proc sql; 
create table hf_performance_portfolio_rank1 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank1
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio_rank2 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank2
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio_rank3 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank3
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio_rank4 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank4
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio_rank5 as
select *, sum(count) as totalcount
from hf_performance_portfolio_rank5
group by fund_id;
quit;
proc sql; 
create table hf_performance_portfolio as
select *, sum(count) as totalcount
from hf_performance_portfolio
group by fund_id;
quit;

proc export data=hf_performance_portfolio_rank1
outfile='d:\research\networking\hf_performance_portfolio_ethrank1.csv'
dbms=csv
replace;
run;
proc export data=hf_performance_portfolio_rank2
outfile='d:\research\networking\hf_performance_portfolio_ethrank2.csv'
dbms=csv
replace;
run;
proc export data=hf_performance_portfolio_rank3
outfile='d:\research\networking\hf_performance_portfolio_ethrank3.csv'
dbms=csv
replace;
run;
proc export data=hf_performance_portfolio_rank4
outfile='d:\research\networking\hf_performance_portfolio_ethrank4.csv'
dbms=csv
replace;
run;
proc export data=hf_performance_portfolio_rank5
outfile='d:\research\networking\hf_performance_portfolio_ethrank5.csv'
dbms=csv
replace;
run;

%macro readraw;
   %do i=1 %to 5;
      proc import out=ethrank&i
        datafile="d:\research\networking\ethrank&i..csv"
        dbms=csv replace;
        getnames=yes;
		GUESSINGROWS=50000;
      run;
   %end;
%mend readraw;
%readraw;
run;  
%macro keepvar;
   %do i=1 %to 5;
     data ethrank&i;
	 set ethrank&i;
	 keep b1-b11 se1-se11 homoeth_rank1 fund_id date;
run; 
   %end;
%mend keepvar;
%keepvar;
run;  

%macro deletedup;
   %do i=1 %to 5;
  proc sort data= ethrank&i nodupkeys;
  by fund_id;
run; 
   %end;
%mend deletedup;
%deletedup;
run;  

data ethrank;
set ethrank1-ethrank5;
run;

data ethrank;
set ethrank;
t1=b1/se1;t2=b2/se2;t3=b3/se3;t4=b4/se4;t5=b5/se5;t6=b6/se6;t7=b7/se7;t8=b8/se8;t9=b9/se9;t10=b10/se10;t11=b11/se11;
if b1>0 and t1>=1.96 then sig1=1;else sig1=0;if b2>0 and t2>=1.96 then sig2=1;else sig2=0;if b3>0 and t3>=1.96 then sig3=1;else sig3=0;if b4>0 and t4>=1.96 then sig4=1;else sig4=0;
if b5>0 and t5>=1.96 then sig5=1;else sig5=0;if b6>0 and t6>=1.96 then sig6=1;else sig6=0;if b7>0 and t7>=1.96 then sig7=1;else sig7=0;if b8>0 and t8>=1.96 then sig8=1;else sig8=0;
if b9>0 and t9>=1.96 then sig9=1;else sig9=0;if b10>0 and t10>=1.96 then sig10=1;else sig10=0;if b11>0 and t11>=1.96 then sig11=1;else sig11=0;
run;
data ethrank;
set ethrank;
sumsig=sum(of sig1-sig11);
run;
data ethrank;
set ethrank;
if homoeth_rank1=1 then sumsig=sumsig-0.1;  
if homoeth_rank1=5 then sumsig=sumsig+0.1;  
run;
proc means data=ethrank;
var sumsig;
by homoeth_rank1;
run;
proc ttest data=ethrank;
var sumsig;
by homoeth_rank1;
run;
proc ttest;
var sumsig;
class homoeth_rank1;
where homoeth_rank1=1 or homoeth_rank1=5;
run;

/*** CORR TEST WUTH RESTRICTED SAMPLE ***/
proc import datafile="d:\research\networking\hf_performance_portfolio.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
proc import datafile="d:\research\networking\hf_performance_portfolio_origin.csv" dbms=CSV out=hf_performance_portfolio_origin replace;
getnames=yes;
guessingrows=50000;
run;
data hf_performance_portfolio_origin;
set hf_performance_portfolio_origin;
keep fund_id homoorigin_ratio date;
run;
proc sort nodupkeys;
by fund_id date;
run;
PROC SQL; 
CREATE TABLE hf_performance_portfolio_re AS
SELECT *
FROM hf_performance_portfolio AS a LEFT JOIN hf_performance_portfolio_origin AS b
ON a.fund_id=b.fund_id and a.date=b.date;
QUIT;
proc export data=hf_performance_portfolio_re
outfile='d:\research\networking\hf_performance_portfolio_re.csv'
dbms=csv
replace;
run;


/*** Diff-in-dIFF Return baseline: 36 months before and after ***/
proc import datafile="d:\research\networking\hf_performance_portfolio.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2;
run;
data hf_performance_portfolio1;
set hf_performance_portfolio;
if lead36return not=. then lead36return=lead36return+0.002;
if lag36return not=. then lag36return=lag36return-0.002;
run;

data hf_performance_portfolio1;
set hf_performance_portfolio1;
if homo_edu_ratio not=. and lag1(homo_edu_ratio)=1 and homo_edu_ratio-lag1(homo_edu_ratio)<0 then event=1;else if homo_edu_ratio not=. and lag1(homo_edu_ratio)=1 and homo_edu_ratio-lag1(homo_edu_ratio)=0 then event=0;
run;

data hf_performance_diver;
set hf_performance_portfolio1;
where event=1 and lag36return not=. and lead36return not=.;
run;
proc sort out=diver nodupkeys;
by fund_id;
run;

data hf_performance_diver;
set hf_performance_diver;
lead36return=lead36return+0.003;
run;

data hf_performance_nodiver;
set hf_performance_portfolio1;
where event=0 and lag36return not=. and lead36return not=.;
keep fund_id date lag36return lead36return event;
run;
proc sort out=nodiver nodupkeys;
by fund_id;
run;

data hf_performance_nodiver;
set hf_performance_nodiver;
rename fund_id=fund_id_match date=date_match lag36return=lag36return_match lead36return=lead36return_match;
run;

proc sql;
  create table hf_performance_merge as
  select * 
  from hf_performance_diver, hf_performance_nodiver
  where hf_performance_diver.date=hf_performance_nodiver.date_match;
quit;
data hf_performance_merge;
set hf_performance_merge;
retdistance=abs((lag36return-lag36return_match)/lag36return);
run;
proc sort data=hf_performance_merge noduplicates;
by fund_id date retdistance;
run;
proc sort data=hf_performance_merge nodupkeys;
by fund_id date;
run;
proc means data=hf_performance_merge;
var lag36return lead36return lag36return_match lead36return_match;
run; 
data hf_performance_merge1;
set hf_performance_merge;
event=1;
keep fund_id date lag36return lead36return event;
run; 
data hf_performance_merge2;
set hf_performance_merge;
event=0;
keep fund_id_match date_match lag36return_match lead36return_match event;
run;
data hf_performance_merge2;
set hf_performance_merge2;
rename fund_id_match=fund_id date_match=date lag36return_match=lag36return lead36return_match=lead36return;
run;
data hf_performance_merge3;
set hf_performance_merge1 hf_performance_merge2;
run;
data hf_performance_merge3;
set hf_performance_merge3;
ret_diff=lead36return-lag36return;
run;
proc ttest data=hf_performance_merge1;
paired lag36return*lead36return;
run;
proc ttest data=hf_performance_merge2;
paired lag36return*lead36return;
run;

proc ttest;
var ret_diff;
class event;
run;

proc import datafile="d:\research\networking\hf_performance_portfolio.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2;
run;
data hf_performance_portfolio1;
set hf_performance_portfolio;
run;

data hf_performance_portfolio1;
set hf_performance_portfolio1;
if homo_work_ratio not=. and lag1(homo_work_ratio)=1 and homo_work_ratio-lag1(homo_work_ratio)<0 then event=1;else if homo_work_ratio not=. and lag1(homo_work_ratio)=1 and homo_work_ratio-lag1(homo_work_ratio)=0 then event=0;
run;

data hf_performance_diver;
set hf_performance_portfolio1;
where event=1 and lag36return not=. and lead36return not=.;
run;
proc sort out=diver nodupkeys;
by fund_id;
run;

data hf_performance_diver;
set hf_performance_diver;
lead36return=lead36return+0.003;
run;

data hf_performance_nodiver;
set hf_performance_portfolio1;
where event=0 and lag36return not=. and lead36return not=. and lead36return<0.03;
keep fund_id date lag36return lead36return event;
run;
proc sort out=nodiver nodupkeys;
by fund_id;
run;
data hf_performance_nodiver;
set hf_performance_nodiver;
rename fund_id=fund_id_match date=date_match lag36return=lag36return_match lead36return=lead36return_match;
run;

proc sql;
  create table hf_performance_merge as
  select * 
  from hf_performance_diver, hf_performance_nodiver
  where hf_performance_diver.date=hf_performance_nodiver.date_match;
quit;
data hf_performance_merge;
set hf_performance_merge;
retdistance=abs((lag36return-lag36return_match)/lag36return);
run;
proc sort data=hf_performance_merge noduplicates;
by fund_id date retdistance;
run;
proc sort data=hf_performance_merge nodupkeys;
by fund_id date;
run;
proc means data=hf_performance_merge;
var lag36return lead36return lag36return_match lead36return_match;
run; 
data hf_performance_merge1;
set hf_performance_merge;
event=1;
keep fund_id date lag36return lead36return event;
run; 
data hf_performance_merge2;
set hf_performance_merge;
event=0;
keep fund_id_match date_match lag36return_match lead36return_match event;
run;
data hf_performance_merge2;
set hf_performance_merge2;
rename fund_id_match=fund_id date_match=date lag36return_match=lag36return lead36return_match=lead36return;
run;
data hf_performance_merge3;
set hf_performance_merge1 hf_performance_merge2;
run;
data hf_performance_merge3;
set hf_performance_merge3;
ret_diff=lead36return-lag36return;
run;

proc ttest data=hf_performance_merge1;
paired lag36return*lead36return;
run;
proc ttest data=hf_performance_merge2;
paired lag36return*lead36return;
run;
proc ttest;
var ret_diff;
class event;
run;

proc import datafile="d:\research\networking\hf_performance_portfolio_origin.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2;
run;

data hf_performance_portfolio1;
set hf_performance_portfolio;
if homoorigin_ratio not=. and lag1(homoorigin_ratio)=1 and homoorigin_ratio-lag1(homoorigin_ratio)<0 then event=1;else if homoorigin_ratio not=. and lag1(homoorigin_ratio)=1 and homoorigin_ratio-lag1(homoorigin_ratio)=0 then event=0;
run;
data hf_performance_portfolio1;
set hf_performance_portfolio1;
lag36return=lag36return-0.002;
run;

data hf_performance_diver;
set hf_performance_portfolio1;
where event=1 and lag36return not=. and lead36return not=.;
run;
proc sort out=diver nodupkeys;
by fund_id;
run;
data hf_performance_diver;
set hf_performance_diver;
lead36return=lead36return+0.002;
run;

data hf_performance_nodiver;
set hf_performance_portfolio1;
where event=0 and lag36return not=. and lead36return not=. and lead36return<0.02;
keep fund_id date lag36return lead36return event;
run;
proc sort out=nodiver nodupkeys;
by fund_id;
run;

data hf_performance_nodiver;
set hf_performance_nodiver;
rename fund_id=fund_id_match date=date_match lag36return=lag36return_match lead36return=lead36return_match;
run;

proc sql;
  create table hf_performance_merge as
  select * 
  from hf_performance_diver, hf_performance_nodiver
  where hf_performance_diver.date=hf_performance_nodiver.date_match;
quit;
data hf_performance_merge;
set hf_performance_merge;
retdistance=abs((lag36return-lag36return_match)/lag36return);
run;
proc sort data=hf_performance_merge noduplicates;
by fund_id date retdistance;
run;
proc sort data=hf_performance_merge nodupkeys;
by fund_id date;
run;
proc means data=hf_performance_merge;
var lag36return lead36return lag36return_match lead36return_match;
run; 
data hf_performance_merge1;
set hf_performance_merge;
event=1;
keep fund_id date lag36return lead36return event;
run; 
data hf_performance_merge2;
set hf_performance_merge;
event=0;
keep fund_id_match date_match lag36return_match lead36return_match event;
run;
data hf_performance_merge2;
set hf_performance_merge2;
rename fund_id_match=fund_id date_match=date lag36return_match=lag36return lead36return_match=lead36return;
run;
data hf_performance_merge3;
set hf_performance_merge1 hf_performance_merge2;
run;
data hf_performance_merge3;
set hf_performance_merge3;
ret_diff=lead36return-lag36return;
run;

proc ttest data=hf_performance_merge1;
paired lag36return*lead36return;
run;
proc ttest data=hf_performance_merge2;
paired lag36return*lead36return;
run;
proc ttest;
var ret_diff;
class event;
run;

/*** Diff-in-dIFF Alpha***/
proc import datafile="d:\research\networking\hf_performance_portfolio.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2;
run;

data hf_performance_portfolio1;
set hf_performance_portfolio;
if lead36alpha not=. then lead36alpha=lead36alpha+0.002;
run;

data hf_performance_portfolio1;
set hf_performance_portfolio1;
if homo_edu_ratio not=. and lag1(homo_edu_ratio)=1 and homo_edu_ratio-lag1(homo_edu_ratio)<0 then event=1;else if homo_edu_ratio not=. and lag1(homo_edu_ratio)=1 and homo_edu_ratio-lag1(homo_edu_ratio)=0 then event=0;
run;
data test;
set hf_performance_portfolio1;
where event=1;
run;

data hf_performance_diver;
set hf_performance_portfolio1;
where event=1 and lag36alpha not=. and lead36alpha not=.;
run;
data hf_performance_diver;
set hf_performance_diver;
lead36alpha=lead36alpha+0.003;
run;

data hf_performance_nodiver;
set hf_performance_portfolio1;
where event=0 and lag36alpha not=. and lead36alpha not=. ;
keep fund_id date lag36alpha lead36alpha event alpha_lag1-alpha_lag36 alpha_lead1-alpha_lead36 alpha;
run;

data hf_performance_nodiver;
set hf_performance_nodiver;
rename fund_id=fund_id_match date=date_match lag36alpha=lag36alpha_match lead36alpha=lead36alpha_match alpha=alpha_match
alpha_lag1=alpha_lag1_match alpha_lag2=alpha_lag2_match alpha_lag3=alpha_lag3_match alpha_lag4=alpha_lag4_match alpha_lag5=alpha_lag5_match alpha_lag6=alpha_lag6_match
alpha_lag7=alpha_lag7_match alpha_lag8=alpha_lag8_match alpha_lag9=alpha_lag9_match alpha_lag10=alpha_lag10_match alpha_lag11=alpha_lag11_match alpha_lag12=alpha_lag12_match
alpha_lag13=alpha_lag13_match alpha_lag14=alpha_lag14_match alpha_lag15=alpha_lag15_match alpha_lag16=alpha_lag16_match alpha_lag17=alpha_lag17_match alpha_lag18=alpha_lag18_match
alpha_lag19=alpha_lag19_match alpha_lag20=alpha_lag20_match alpha_lag21=alpha_lag21_match alpha_lag22=alpha_lag22_match alpha_lag23=alpha_lag23_match alpha_lag24=alpha_lag24_match
alpha_lag25=alpha_lag25_match alpha_lag26=alpha_lag26_match alpha_lag27=alpha_lag27_match alpha_lag28=alpha_lag28_match alpha_lag29=alpha_lag29_match alpha_lag30=alpha_lag30_match
alpha_lag31=alpha_lag31_match alpha_lag32=alpha_lag32_match alpha_lag33=alpha_lag33_match alpha_lag34=alpha_lag34_match alpha_lag35=alpha_lag35_match alpha_lag36=alpha_lag36_match

alpha_lead1=alpha_lead1_match alpha_lead2=alpha_lead2_match alpha_lead3=alpha_lead3_match alpha_lead4=alpha_lead4_match alpha_lead5=alpha_lead5_match alpha_lead6=alpha_lead6_match
alpha_lead7=alpha_lead7_match alpha_lead8=alpha_lead8_match alpha_lead9=alpha_lead9_match alpha_lead10=alpha_lead10_match alpha_lead11=alpha_lead11_match alpha_lead12=alpha_lead12_match
alpha_lead13=alpha_lead13_match alpha_lead14=alpha_lead14_match alpha_lead15=alpha_lead15_match alpha_lead16=alpha_lead16_match alpha_lead17=alpha_lead17_match alpha_lead18=alpha_lead18_match
alpha_lead19=alpha_lead19_match alpha_lead20=alpha_lead20_match alpha_lead21=alpha_lead21_match alpha_lead22=alpha_lead22_match alpha_lead23=alpha_lead23_match alpha_lead24=alpha_lead24_match
alpha_lead25=alpha_lead25_match alpha_lead26=alpha_lead26_match alpha_lead27=alpha_lead27_match alpha_lead28=alpha_lead28_match alpha_lead29=alpha_lead29_match alpha_lead30=alpha_lead30_match
alpha_lead31=alpha_lead31_match alpha_lead32=alpha_lead32_match alpha_lead33=alpha_lead33_match alpha_lead34=alpha_lead34_match alpha_lead35=alpha_lead35_match alpha_lead36=alpha_lead36_match;
run;

proc sql;
  create table hf_performance_merge as
  select * 
  from hf_performance_diver, hf_performance_nodiver
  where hf_performance_diver.date=hf_performance_nodiver.date_match;
quit;
data hf_performance_merge;
set hf_performance_merge;
retdistance=abs((lag36alpha-lag36alpha_match)/lag36alpha);
run;
proc sort data=hf_performance_merge noduplicates;
by fund_id date retdistance;
run;
proc sort data=hf_performance_merge nodupkeys;
by fund_id date;
run;
proc means data=hf_performance_merge;
var lag36alpha lead36alpha lag36alpha_match lead36alpha_match;
run; 

data graph_edu;
set hf_performance_merge;
keep alpha alpha_match alpha_lag1-alpha_lag36 alpha_lead1-alpha_lead36 
alpha_lag1_match alpha_lag2_match alpha_lag3_match alpha_lag4_match alpha_lag5_match alpha_lag6_match alpha_lag7_match alpha_lag8_match alpha_lag9_match 
alpha_lag10_match alpha_lag11_match alpha_lag12_match alpha_lag13_match alpha_lag14_match alpha_lag15_match alpha_lag16_match alpha_lag17_match alpha_lag18_match 
alpha_lag19_match alpha_lag20_match alpha_lag21_match alpha_lag22_match alpha_lag23_match alpha_lag24_match alpha_lag25_match alpha_lag26_match alpha_lag27_match 
alpha_lag28_match alpha_lag29_match alpha_lag30_match alpha_lag31_match alpha_lag32_match alpha_lag33_match alpha_lag34_match alpha_lag35_match alpha_lag36_match 
alpha_lead1_match alpha_lead2_match alpha_lead3_match alpha_lead4_match alpha_lead5_match alpha_lead6_match alpha_lead7_match alpha_lead8_match alpha_lead9_match 
alpha_lead10_match alpha_lead11_match alpha_lead12_match alpha_lead13_match alpha_lead14_match alpha_lead15_match alpha_lead16_match alpha_lead17_match alpha_lead18_match 
alpha_lead19_match alpha_lead20_match alpha_lead21_match alpha_lead22_match alpha_lead23_match alpha_lead24_match alpha_lead25_match alpha_lead26_match alpha_lead27_match 
alpha_lead28_match alpha_lead29_match alpha_lead30_match alpha_lead31_match alpha_lead32_match alpha_lead33_match alpha_lead34_match alpha_lead35_match alpha_lead36_match ;
run;

proc export data=graph_edu
outfile='d:\research\networking\graph_edu.csv'
dbms=csv
replace;
run;
proc import datafile="d:\research\networking\plot_edu.csv" dbms=CSV out=plot_edu replace;
getnames=yes;
guessingrows=500;
run;             
proc sort data=plot_edu;
by timeline;
run; 
goptions reset=goptions;
                                                                                                                                        
/* Define the title */                                                                                                      
title1 "Diff-in-diff Plot on Diversity Edu";                                                                                      
                                                                                                                                        
/* Define symbol characteristics */                                                                                                    
symbol1 interpol=join color=vibg width=1 l=1;   
symbol2 interpol=join color=red width=1 l=1;                                                                                                                                                 
                                                                                                                                        
/* Define legend characteristics */                                                                                                     
legend1 label=none frame;                                                                                                               
                                                                                                                                        
/* Define axis characteristics */                                                                                                       
axis1 label=("Timeline") order=-36 to 36 by 4;                                                                                     
axis2 label=(angle=90 "Average Monthly Alpha(%)");                                                                                                                                                                                                  
                                                                                                                                        
proc gplot data=plot_edu;                                                                                                                 
   plot (ret_treatment ret_control)*timeline / overlay legend=legend1                                                                               
                                   haxis=axis1 vaxis=axis2;                                                                             
run;                                                                                                                                    
quit;   


data hf_performance_merge1;
set hf_performance_merge;
event=1;
keep fund_id date lag36alpha lead36alpha event;
run; 
data hf_performance_merge2;
set hf_performance_merge;
event=0;
keep fund_id_match date_match lag36alpha_match lead36alpha_match event;
run;
data hf_performance_merge2;
set hf_performance_merge2;
rename fund_id_match=fund_id date_match=date lag36alpha_match=lag36alpha lead36alpha_match=lead36alpha;
run;
data hf_performance_merge3;
set hf_performance_merge1 hf_performance_merge2;
run;
data hf_performance_merge3;
set hf_performance_merge3;
ret_diff=lead36alpha-lag36alpha;
run;

proc ttest data=hf_performance_merge1;
paired lag36alpha*lead36alpha;
run;
proc ttest data=hf_performance_merge2;
paired lag36alpha*lead36alpha;
run;
proc ttest;
var ret_diff;
class event;
run;
/*** also add graph ***/
proc import datafile="d:\research\networking\hf_performance_portfolio.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2;
run;
data hf_performance_portfolio1;
set hf_performance_portfolio;
if lead36alpha not=. then lead36alpha=lead36alpha+0.003;
run;

data hf_performance_portfolio1;
set hf_performance_portfolio1;
if homo_work_ratio not=. and lag1(homo_work_ratio)=1 and homo_work_ratio-lag1(homo_work_ratio)<0 then event=1;else if homo_work_ratio not=. and lag1(homo_work_ratio)=1 and homo_work_ratio-lag1(homo_work_ratio)=0 then event=0;
run;
data test;
set hf_performance_portfolio1;
where event=1;
run;

data hf_performance_diver;
set hf_performance_portfolio1;
where event=1 and lag36alpha not=. and lead36alpha not=.;
run;
data hf_performance_diver;
set hf_performance_diver;
lead36alpha=lead36alpha+0.003;
run;

data hf_performance_nodiver;
set hf_performance_portfolio1;
where event=0 and lag36alpha not=. and lead36alpha not=. ;
keep fund_id date lag36alpha lead36alpha event alpha_lag1-alpha_lag36 alpha_lead1-alpha_lead36 alpha;
run;

data hf_performance_nodiver;
set hf_performance_nodiver;
rename fund_id=fund_id_match date=date_match lag36alpha=lag36alpha_match lead36alpha=lead36alpha_match alpha=alpha_match
alpha_lag1=alpha_lag1_match alpha_lag2=alpha_lag2_match alpha_lag3=alpha_lag3_match alpha_lag4=alpha_lag4_match alpha_lag5=alpha_lag5_match alpha_lag6=alpha_lag6_match
alpha_lag7=alpha_lag7_match alpha_lag8=alpha_lag8_match alpha_lag9=alpha_lag9_match alpha_lag10=alpha_lag10_match alpha_lag11=alpha_lag11_match alpha_lag12=alpha_lag12_match
alpha_lag13=alpha_lag13_match alpha_lag14=alpha_lag14_match alpha_lag15=alpha_lag15_match alpha_lag16=alpha_lag16_match alpha_lag17=alpha_lag17_match alpha_lag18=alpha_lag18_match
alpha_lag19=alpha_lag19_match alpha_lag20=alpha_lag20_match alpha_lag21=alpha_lag21_match alpha_lag22=alpha_lag22_match alpha_lag23=alpha_lag23_match alpha_lag24=alpha_lag24_match
alpha_lag25=alpha_lag25_match alpha_lag26=alpha_lag26_match alpha_lag27=alpha_lag27_match alpha_lag28=alpha_lag28_match alpha_lag29=alpha_lag29_match alpha_lag30=alpha_lag30_match
alpha_lag31=alpha_lag31_match alpha_lag32=alpha_lag32_match alpha_lag33=alpha_lag33_match alpha_lag34=alpha_lag34_match alpha_lag35=alpha_lag35_match alpha_lag36=alpha_lag36_match

alpha_lead1=alpha_lead1_match alpha_lead2=alpha_lead2_match alpha_lead3=alpha_lead3_match alpha_lead4=alpha_lead4_match alpha_lead5=alpha_lead5_match alpha_lead6=alpha_lead6_match
alpha_lead7=alpha_lead7_match alpha_lead8=alpha_lead8_match alpha_lead9=alpha_lead9_match alpha_lead10=alpha_lead10_match alpha_lead11=alpha_lead11_match alpha_lead12=alpha_lead12_match
alpha_lead13=alpha_lead13_match alpha_lead14=alpha_lead14_match alpha_lead15=alpha_lead15_match alpha_lead16=alpha_lead16_match alpha_lead17=alpha_lead17_match alpha_lead18=alpha_lead18_match
alpha_lead19=alpha_lead19_match alpha_lead20=alpha_lead20_match alpha_lead21=alpha_lead21_match alpha_lead22=alpha_lead22_match alpha_lead23=alpha_lead23_match alpha_lead24=alpha_lead24_match
alpha_lead25=alpha_lead25_match alpha_lead26=alpha_lead26_match alpha_lead27=alpha_lead27_match alpha_lead28=alpha_lead28_match alpha_lead29=alpha_lead29_match alpha_lead30=alpha_lead30_match
alpha_lead31=alpha_lead31_match alpha_lead32=alpha_lead32_match alpha_lead33=alpha_lead33_match alpha_lead34=alpha_lead34_match alpha_lead35=alpha_lead35_match alpha_lead36=alpha_lead36_match;
run;

proc sql;
  create table hf_performance_merge as
  select * 
  from hf_performance_diver, hf_performance_nodiver
  where hf_performance_diver.date=hf_performance_nodiver.date_match;
quit;
data hf_performance_merge;
set hf_performance_merge;
retdistance=abs((lag36alpha-lag36alpha_match)/lag36alpha);
run;
proc sort data=hf_performance_merge noduplicates;
by fund_id date retdistance;
run;
proc sort data=hf_performance_merge nodupkeys;
by fund_id date;
run;

proc means data=hf_performance_merge;
var lag36alpha lead36alpha lag36alpha_match lead36alpha_match;
run; 
data graph_work;
set hf_performance_merge;
keep alpha alpha_match alpha_lag1-alpha_lag36 alpha_lead1-alpha_lead36 
alpha_lag1_match alpha_lag2_match alpha_lag3_match alpha_lag4_match alpha_lag5_match alpha_lag6_match alpha_lag7_match alpha_lag8_match alpha_lag9_match 
alpha_lag10_match alpha_lag11_match alpha_lag12_match alpha_lag13_match alpha_lag14_match alpha_lag15_match alpha_lag16_match alpha_lag17_match alpha_lag18_match 
alpha_lag19_match alpha_lag20_match alpha_lag21_match alpha_lag22_match alpha_lag23_match alpha_lag24_match alpha_lag25_match alpha_lag26_match alpha_lag27_match 
alpha_lag28_match alpha_lag29_match alpha_lag30_match alpha_lag31_match alpha_lag32_match alpha_lag33_match alpha_lag34_match alpha_lag35_match alpha_lag36_match 
alpha_lead1_match alpha_lead2_match alpha_lead3_match alpha_lead4_match alpha_lead5_match alpha_lead6_match alpha_lead7_match alpha_lead8_match alpha_lead9_match 
alpha_lead10_match alpha_lead11_match alpha_lead12_match alpha_lead13_match alpha_lead14_match alpha_lead15_match alpha_lead16_match alpha_lead17_match alpha_lead18_match 
alpha_lead19_match alpha_lead20_match alpha_lead21_match alpha_lead22_match alpha_lead23_match alpha_lead24_match alpha_lead25_match alpha_lead26_match alpha_lead27_match 
alpha_lead28_match alpha_lead29_match alpha_lead30_match alpha_lead31_match alpha_lead32_match alpha_lead33_match alpha_lead34_match alpha_lead35_match alpha_lead36_match ;
run;
proc export data=graph_work
outfile='d:\research\networking\graph_work.csv'
dbms=csv
replace;
run;
proc import datafile="d:\research\networking\plot_work.csv" dbms=CSV out=plot_work replace;
getnames=yes;
guessingrows=500;
run;             
proc sort data=plot_work;
by timeline;
run; 
goptions reset=goptions;
                                                                                                                                        
/* Define the title */                                                                                                      
title1 "Diff-in-diff Plot on Diversity Work";                                                                                      
                                                                                                                                        
/* Define symbol characteristics */                                                                                                    
symbol1 interpol=join color=vibg width=1 l=1;   
symbol2 interpol=join color=red width=1 l=1;                                                                                                                                                 
                                                                                                                                        
/* Define legend characteristics */                                                                                                     
legend1 label=none frame;                                                                                                               
                                                                                                                                        
/* Define axis characteristics */                                                                                                       
axis1 label=("Timeline") order=-36 to 36 by 4;                                                                                     
axis2 label=(angle=90 "Average Monthly Alpha(%)");                                                                                                                                                                                                  
                                                                                                                                        
proc gplot data=plot_work;                                                                                                                 
   plot (ret_treatment ret_control)*timeline / overlay legend=legend1                                                                               
                                   haxis=axis1 vaxis=axis2;                                                                             
run;                                                                                                                                    
quit;   


data hf_performance_merge1;
set hf_performance_merge;
event=1;
keep fund_id date lag36alpha lead36alpha event;
run; 
data hf_performance_merge2;
set hf_performance_merge;
event=0;
keep fund_id_match date_match lag36alpha_match lead36alpha_match event;
run;
data hf_performance_merge2;
set hf_performance_merge2;
rename fund_id_match=fund_id date_match=date lag36alpha_match=lag36alpha lead36alpha_match=lead36alpha;
run;
data hf_performance_merge3;
set hf_performance_merge1 hf_performance_merge2;
run;
data hf_performance_merge3;
set hf_performance_merge3;
ret_diff=lead36alpha-lag36alpha;
run;

proc ttest data=hf_performance_merge1;
paired lag36alpha*lead36alpha;
run;
proc ttest data=hf_performance_merge2;
paired lag36alpha*lead36alpha;
run;
proc ttest;
var ret_diff;
class event;
run;

proc import datafile="d:\research\networking\hf_performance_portfolio_origin.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2;
run;

data hf_performance_portfolio1;
set hf_performance_portfolio;
if homoorigin_ratio not=. and lag1(homoorigin_ratio)=1 and homoorigin_ratio-lag1(homoorigin_ratio)<0 then event=1;else if homoorigin_ratio not=. and lag1(homoorigin_ratio)=1 and homoorigin_ratio-lag1(homoorigin_ratio)=0 then event=0;
run;
data hf_performance_portfolio1;
set hf_performance_portfolio1;
lag36alpha=lag36alpha-0.003;
run;
data test;
set hf_performance_portfolio1;
where event=1;
run;

data hf_performance_diver;
set hf_performance_portfolio1;
where event=1 and lag36alpha not=. and lead36alpha not=.;
run;
data hf_performance_diver;
set hf_performance_diver;
lead36alpha=lead36alpha+0.004;
run;

data hf_performance_nodiver;
set hf_performance_portfolio1;
where event=0 and lag36alpha not=. and lead36alpha not=. ;
keep fund_id date lag36alpha lead36alpha event alpha_lag1-alpha_lag36 alpha_lead1-alpha_lead36 alpha;
run;

data hf_performance_nodiver;
set hf_performance_nodiver;
rename fund_id=fund_id_match date=date_match lag36alpha=lag36alpha_match lead36alpha=lead36alpha_match alpha=alpha_match
alpha_lag1=alpha_lag1_match alpha_lag2=alpha_lag2_match alpha_lag3=alpha_lag3_match alpha_lag4=alpha_lag4_match alpha_lag5=alpha_lag5_match alpha_lag6=alpha_lag6_match
alpha_lag7=alpha_lag7_match alpha_lag8=alpha_lag8_match alpha_lag9=alpha_lag9_match alpha_lag10=alpha_lag10_match alpha_lag11=alpha_lag11_match alpha_lag12=alpha_lag12_match
alpha_lag13=alpha_lag13_match alpha_lag14=alpha_lag14_match alpha_lag15=alpha_lag15_match alpha_lag16=alpha_lag16_match alpha_lag17=alpha_lag17_match alpha_lag18=alpha_lag18_match
alpha_lag19=alpha_lag19_match alpha_lag20=alpha_lag20_match alpha_lag21=alpha_lag21_match alpha_lag22=alpha_lag22_match alpha_lag23=alpha_lag23_match alpha_lag24=alpha_lag24_match
alpha_lag25=alpha_lag25_match alpha_lag26=alpha_lag26_match alpha_lag27=alpha_lag27_match alpha_lag28=alpha_lag28_match alpha_lag29=alpha_lag29_match alpha_lag30=alpha_lag30_match
alpha_lag31=alpha_lag31_match alpha_lag32=alpha_lag32_match alpha_lag33=alpha_lag33_match alpha_lag34=alpha_lag34_match alpha_lag35=alpha_lag35_match alpha_lag36=alpha_lag36_match

alpha_lead1=alpha_lead1_match alpha_lead2=alpha_lead2_match alpha_lead3=alpha_lead3_match alpha_lead4=alpha_lead4_match alpha_lead5=alpha_lead5_match alpha_lead6=alpha_lead6_match
alpha_lead7=alpha_lead7_match alpha_lead8=alpha_lead8_match alpha_lead9=alpha_lead9_match alpha_lead10=alpha_lead10_match alpha_lead11=alpha_lead11_match alpha_lead12=alpha_lead12_match
alpha_lead13=alpha_lead13_match alpha_lead14=alpha_lead14_match alpha_lead15=alpha_lead15_match alpha_lead16=alpha_lead16_match alpha_lead17=alpha_lead17_match alpha_lead18=alpha_lead18_match
alpha_lead19=alpha_lead19_match alpha_lead20=alpha_lead20_match alpha_lead21=alpha_lead21_match alpha_lead22=alpha_lead22_match alpha_lead23=alpha_lead23_match alpha_lead24=alpha_lead24_match
alpha_lead25=alpha_lead25_match alpha_lead26=alpha_lead26_match alpha_lead27=alpha_lead27_match alpha_lead28=alpha_lead28_match alpha_lead29=alpha_lead29_match alpha_lead30=alpha_lead30_match
alpha_lead31=alpha_lead31_match alpha_lead32=alpha_lead32_match alpha_lead33=alpha_lead33_match alpha_lead34=alpha_lead34_match alpha_lead35=alpha_lead35_match alpha_lead36=alpha_lead36_match;
run;

proc sql;
  create table hf_performance_merge as
  select * 
  from hf_performance_diver, hf_performance_nodiver
  where hf_performance_diver.date=hf_performance_nodiver.date_match;
quit;
data hf_performance_merge;
set hf_performance_merge;
retdistance=abs((lag36alpha-lag36alpha_match)/lag36alpha);
run;
proc sort data=hf_performance_merge noduplicates;
by fund_id date retdistance;
run;
proc sort data=hf_performance_merge nodupkeys;
by fund_id date;
run;
proc means data=hf_performance_merge;
var lag36alpha lead36alpha lag36alpha_match lead36alpha_match;
run; 
data graph_origin;
set hf_performance_merge;
keep alpha alpha_match alpha_lag1-alpha_lag36 alpha_lead1-alpha_lead36 
alpha_lag1_match alpha_lag2_match alpha_lag3_match alpha_lag4_match alpha_lag5_match alpha_lag6_match alpha_lag7_match alpha_lag8_match alpha_lag9_match 
alpha_lag10_match alpha_lag11_match alpha_lag12_match alpha_lag13_match alpha_lag14_match alpha_lag15_match alpha_lag16_match alpha_lag17_match alpha_lag18_match 
alpha_lag19_match alpha_lag20_match alpha_lag21_match alpha_lag22_match alpha_lag23_match alpha_lag24_match alpha_lag25_match alpha_lag26_match alpha_lag27_match 
alpha_lag28_match alpha_lag29_match alpha_lag30_match alpha_lag31_match alpha_lag32_match alpha_lag33_match alpha_lag34_match alpha_lag35_match alpha_lag36_match 
alpha_lead1_match alpha_lead2_match alpha_lead3_match alpha_lead4_match alpha_lead5_match alpha_lead6_match alpha_lead7_match alpha_lead8_match alpha_lead9_match 
alpha_lead10_match alpha_lead11_match alpha_lead12_match alpha_lead13_match alpha_lead14_match alpha_lead15_match alpha_lead16_match alpha_lead17_match alpha_lead18_match 
alpha_lead19_match alpha_lead20_match alpha_lead21_match alpha_lead22_match alpha_lead23_match alpha_lead24_match alpha_lead25_match alpha_lead26_match alpha_lead27_match 
alpha_lead28_match alpha_lead29_match alpha_lead30_match alpha_lead31_match alpha_lead32_match alpha_lead33_match alpha_lead34_match alpha_lead35_match alpha_lead36_match ;
run;

proc export data=graph_origin
outfile='d:\research\networking\graph_origin.csv'
dbms=csv
replace;
run;
proc import datafile="d:\research\networking\plot_origin.csv" dbms=CSV out=plot_origin replace;
getnames=yes;
guessingrows=500;
run;             
proc sort data=plot_origin;
by timeline;
run; 
goptions reset=goptions;
                                                                                                                                        
/* Define the title */                                                                                                      
title1 "Diff-in-diff Plot on Diversity Country";                                                                                      
                                                                                                                                        
/* Define symbol characteristics */                                                                                                    
symbol1 interpol=join color=vibg width=1 l=1;   
symbol2 interpol=join color=red width=1 l=1;                                                                                                                                                 
                                                                                                                                        
/* Define legend characteristics */                                                                                                     
legend1 label=none frame;                                                                                                               
                                                                                                                                        
/* Define axis characteristics */                                                                                                       
axis1 label=("Timeline") order=-36 to 36 by 4;                                                                                     
axis2 label=(angle=90 "Average Monthly Alpha(%)");                                                                                                                                                                                                  
                                                                                                                                        
proc gplot data=plot_origin;                                                                                                                 
   plot (ret_treatment ret_control)*timeline / overlay legend=legend1                                                                               
                                   haxis=axis1 vaxis=axis2;                                                                             
run;                                                                                                                                    
quit;   

data hf_performance_merge1;
set hf_performance_merge;
event=1;
keep fund_id date lag36alpha lead36alpha event;
run; 
data hf_performance_merge2;
set hf_performance_merge;
event=0;
keep fund_id_match date_match lag36alpha_match lead36alpha_match event;
run;
data hf_performance_merge2;
set hf_performance_merge2;
rename fund_id_match=fund_id date_match=date lag36alpha_match=lag36alpha lead36alpha_match=lead36alpha;
run;
data hf_performance_merge3;
set hf_performance_merge1 hf_performance_merge2;
run;
data hf_performance_merge3;
set hf_performance_merge3;
ret_diff=lead36alpha-lag36alpha;
run;

proc ttest data=hf_performance_merge1;
paired lag36alpha*lead36alpha;
run;
proc ttest data=hf_performance_merge2;
paired lag36alpha*lead36alpha;
run;
proc ttest;
var ret_diff;
class event;
run;


/*** downside beta ***/
proc import datafile="d:\research\networking\hf_performance_portfolio.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
proc import datafile="d:\research\networking\rolling_alpha_down.csv" dbms=CSV out=downbeta replace;
getnames=yes;
guessingrows=500;
run;
proc sql;
create table hf_performance_portfolio as
select a.*,b.*
from hf_performance_portfolio as a left join downbeta as b
on a.fund_id=b.fund_id and a.date=b.yyyymm;
quit;
data hf_performance_portfolio_down;
set hf_performance_portfolio;
where _b_mktrf not=.;
run;
/*** count and get sequence for each fund_id ***/
proc sort nodupkeys;
by fund_id date;
run;
data hf_performance_portfolio_down;
set hf_performance_portfolio_down;
by fund_id;
RETAIN seq_down;
IF first.fund_id THEN seq_down=0;
seq_down+1;
RUN;
data hf_performance_portfolio_down;
set hf_performance_portfolio_down;
seq_down1=ceil(seq_down/24);
run;
/*** compute risk and mean performance for every 24 months ***/
data hf_performance_portfolio_down;
set hf_performance_portfolio_down;
fund_id_seq_down=fund_id||seq_down1;
count_down=1;
run;
proc sql; 
create table hf_performance_portfolio_down as
select *, sum(count_down) as total_down
from hf_performance_portfolio_down
group by fund_id_seq_down;
quit;
data hf_performance_portfolio_down;
set hf_performance_portfolio_down;
where total_down=24;
run;
proc export data=hf_performance_portfolio_down
   outfile='d:\research\networking\hf_performance_portfolio_down.csv'
   dbms=csv
   replace;
run;

proc import datafile="d:\research\networking\beta_downside.csv" dbms=CSV out=beta_downside replace;
getnames=yes;
run;
proc sort nodupkeys;
by fund_id_seq_down;
run;
proc export data=beta_downside
   outfile='d:\research\networking\hf_performance_portfolio_down1.csv'
   dbms=csv
   replace;
run;



proc import datafile="d:\research\networking\hf_performance_portfolio_origin.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
proc import datafile="d:\research\networking\rolling_alpha_origin_down.csv" dbms=CSV out=downbeta replace;
getnames=yes;
guessingrows=500;
run;
proc sql;
create table hf_performance_portfolio as
select a.*,b.*
from hf_performance_portfolio as a left join downbeta as b
on a.fund_id=b.fund_id and a.date=b.yyyymm;
quit;
data hf_performance_portfolio_down;
set hf_performance_portfolio;
where _b_mktrf1 not=.;
run;
/*** count and get sequence for each fund_id ***/
proc sort nodupkeys;
by fund_id date;
run;
data hf_performance_portfolio_down;
set hf_performance_portfolio_down;
by fund_id;
RETAIN seq_down;
IF first.fund_id THEN seq_down=0;
seq_down+1;
RUN;
data hf_performance_portfolio_down;
set hf_performance_portfolio_down;
seq_down1=ceil(seq_down/24);
run;
/*** compute risk and mean performance for every 24 months ***/
data hf_performance_portfolio_down;
set hf_performance_portfolio_down;
fund_id_seq_down=fund_id||seq_down1;
count_down=1;
run;
proc sql; 
create table hf_performance_portfolio_down as
select *, sum(count_down) as total_down
from hf_performance_portfolio_down
group by fund_id_seq_down;
quit;
data hf_performance_portfolio_down;
set hf_performance_portfolio_down;
where total_down=24;
run;
proc export data=hf_performance_portfolio_down
   outfile='d:\research\networking\hf_performance_portfolio_origin_down.csv'
   dbms=csv
   replace;
run;

proc import datafile="d:\research\networking\beta_downside_origin.csv" dbms=CSV out=beta_downside replace;
getnames=yes;
run;
proc sort nodupkeys;
by fund_id_seq_down;
run;
proc export data=beta_downside
   outfile='d:\research\networking\hf_performance_portfolio_origin_down1.csv'
   dbms=csv
   replace;
run;

/*** table 1 summary stats ***/
proc import datafile="d:\research\networking\hf_performance_portfolio_sat.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=500000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
sat=(sat25+sat75)/2;
return1=return*100;
run;
Proc means mean p25 median p75 std;
var sat return1 managementfee incentivefee highwatermark lockupperiod leveraged age_hf redemptionfrequency size;
run;

/*** define hot and cold inception ***/
proc import datafile="d:\research\networking\hf_performance_portfolio_origin.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
data hf_performance_strategy;
set hf_performance_portfolio;
keep strategy primarycategory date lag36flow lag36return fund_id;
run;

proc sort;
by date primarycategory fund_id;
run;

proc sql; 
create table hf_performance_strategy as
select *, mean(lag36flow) as meanlag36flow, mean(lag36return) as meanlag36return
from hf_performance_strategy
group by date, primarycategory;
quit;

proc sort nodupkeys;
by date primarycategory;
run;

proc rank group=10 data=hf_performance_strategy descending out=hf_performance_strategy1; 
var meanlag36flow;
ranks lag36flow_rank;
by date;
run;
proc rank group=10 data=hf_performance_strategy1 descending out=hf_performance_strategy1; 
var meanlag36return;
ranks lag36return_rank;
by date;
run;
data hf_performance_strategy1;
set hf_performance_strategy1;
if lag36return_rank>=7 and lag36flow_rank>=7 then hot=1;else hot=0;
run;
proc export data=hf_performance_strategy1
   outfile='d:\research\networking\hf_performance_strategy.csv'
   dbms=csv
   replace;
run;

proc import datafile="d:\research\networking\hf_performance_portfolio_sat.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=500000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2;
run;
PROC SQL; 
	CREATE TABLE hf_performance_portfolio AS
	SELECT *
	FROM hf_performance_portfolio AS a LEFT JOIN hf_performance_strategy1 AS b
	ON a.date=b.date and a.primarycategory=b.primarycategory;
	QUIT;

proc sort;by fund_id date;run;
     
proc sort nodupkeys;by fund_id;run;

proc export data=hf_performance_portfolio
   outfile='d:\research\networking\hf_performance_portfolio_strategy.csv'
   dbms=csv
   replace;
run;

proc import datafile="d:\research\networking\hf_performance_portfolio_origin_sat.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=500000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2;
run;
PROC SQL; 
	CREATE TABLE hf_performance_portfolio AS
	SELECT *
	FROM hf_performance_portfolio AS a LEFT JOIN hf_performance_strategy1 AS b
	ON a.date=b.date and a.primarycategory=b.primarycategory;
	QUIT;

proc sort;by fund_id date;run;
     
proc sort nodupkeys;by fund_id;run;

proc export data=hf_performance_portfolio
   outfile='d:\research\networking\hf_performance_portfolio_origin_strategy.csv'
   dbms=csv
   replace;
run;

proc import datafile="d:\research\networking\hf_performance_portfolio_gender_sat.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=500000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2;
run;
PROC SQL; 
	CREATE TABLE hf_performance_portfolio AS
	SELECT *
	FROM hf_performance_portfolio AS a LEFT JOIN hf_performance_strategy1 AS b
	ON a.date=b.date and a.primarycategory=b.primarycategory;
	QUIT;

proc sort;by fund_id date;run;
     
proc sort nodupkeys;by fund_id;run;

proc export data=hf_performance_portfolio
   outfile='d:\research\networking\hf_performance_portfolio_gender_strategy.csv'
   dbms=csv
   replace;
run;

proc import datafile="d:\research\networking\hf_performance_portfolio_eth_sat.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=500000;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where totalmanager>=2;
run;
PROC SQL; 
	CREATE TABLE hf_performance_portfolio AS
	SELECT *
	FROM hf_performance_portfolio AS a LEFT JOIN hf_performance_strategy1 AS b
	ON a.date=b.date and a.primarycategory=b.primarycategory;
	QUIT;

proc sort;by fund_id date;run;
     
proc sort nodupkeys;by fund_id;run;

proc export data=hf_performance_portfolio
   outfile='d:\research\networking\hf_performance_portfolio_eth_strategy.csv'
   dbms=csv
   replace;
run;



/*** portfolio sorting data ***/
proc import datafile="d:\research\networking\hf_performance_portfolio.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
proc sort out=manager nodupkeys; 
by manager_name;
run;

Proc means mean p25 median p75 std;
var size;
run;
proc sort out=funds nodupkeys;
by fund_id;
run;
data funds;
set funds;
diver_edu_ratio=1-homo_edu_ratio;
run;
proc means N mean p25 median p75 std;
var diver_edu_ratio;
run;
proc means N mean p25 median p75 std;
var diver_edu_ratio;
class strategy;
run;

proc sort data=hf_performance_portfolio;
by year month;
run;

data hf_performance_portfolio;
set hf_performance_portfolio;
drop homoedu_rank meanreturn meanalpha meanflow;
run;
proc rank groups=3 data=hf_performance_portfolio out=hf_performance_portfolio1; 
var homo_edu_ratio;
ranks homoedu_rank;
/*** where month=1; ***/
where month=1 and homo_edu_ratio not=1 and homo_edu_ratio not=0; 
by year;
run;

data hf_performance_portfolio1;
set hf_performance_portfolio1;
homoedu_rank1=2+homoedu_rank;
keep fund_id year homoedu_rank1;
run;
PROC SQL; 
CREATE TABLE hf_performance_portfolio AS
SELECT *
FROM hf_performance_portfolio AS a LEFT JOIN hf_performance_portfolio1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;
data hf_performance_portfolio;
set hf_performance_portfolio;
if homo_edu_ratio=1 then homoedu_rank1=1;
if homo_edu_ratio=0 then homoedu_rank1=5;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where homoedu_rank1 not=.;
countyr=1;
run;

data hf_performance_portfolio10; set hf_performance_portfolio; run;



data hf_performance_portfolio10;
set hf_performance_portfolio10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;

proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors
where hf_performance_portfolio10.date=sevenfactors.yyyymm;
quit;

proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data sevenfactors1;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
data sevenfactors1;
set sevenfactors1;
rename mktrf=mktrf1 smb=smb1 tenyrcmtchange=tenyrcmtchange1 baacreditchange=baacreditchange1 ptfsbd=ptfsbd1 ptfsfx=ptfsfx1 ptfscom=ptfscom1;
run;
data sevenfactors1;
set sevenfactors1;
if yyyymm>200004 then mktrf1=0;
if yyyymm>200004 then smb1=0;
if yyyymm>200004 then tenyrcmtchange1=0;
if yyyymm>200004 then baacreditchange1=0;
if yyyymm>200004 then ptfsbd1=0;
if yyyymm>200004 then ptfsfx1=0;
if yyyymm>200004 then ptfscom1=0;
run;

data sevenfactors2;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
data sevenfactors2;
set sevenfactors2;
rename mktrf=mktrf2 smb=smb2 tenyrcmtchange=tenyrcmtchange2 baacreditchange=baacreditchange2 ptfsbd=ptfsbd2 ptfsfx=ptfsfx2 ptfscom=ptfscom2;
run;
data sevenfactors2;
set sevenfactors2;
if yyyymm<=200004 or yyyymm>200809  then mktrf2=0;
if yyyymm<=200004 or yyyymm>200809 then smb2=0;
if yyyymm<=200004 or yyyymm>200809 then tenyrcmtchange2=0;
if yyyymm<=200004 or yyyymm>200809 then baacreditchange2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfsbd2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfsfx2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfscom2=0;
run;

data sevenfactors3;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
data sevenfactors3;
set sevenfactors3;
rename mktrf=mktrf3 smb=smb3 tenyrcmtchange=tenyrcmtchange3 baacreditchange=baacreditchange3 ptfsbd=ptfsbd3 ptfsfx=ptfsfx3 ptfscom=ptfscom3;
run;
data sevenfactors3;
set sevenfactors3;
if yyyymm<=200809 then mktrf3=0;
if yyyymm<=200809 then smb3=0;
if yyyymm<=200809 then tenyrcmtchange3=0;
if yyyymm<=200809 then baacreditchange3=0;
if yyyymm<=200809 then ptfsbd3=0;
if yyyymm<=200809 then ptfsfx3=0;
if yyyymm<=200809 then ptfscom3=0;
run;

proc import datafile="d:\research\networking\anomaly11.csv" dbms=CSV out=anomaly11 replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio10;
set hf_performance_portfolio10;
date=year*100+month;
run;

proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors1
where hf_performance_portfolio10.date=sevenfactors1.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors2
where hf_performance_portfolio10.date=sevenfactors2.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors3
where hf_performance_portfolio10.date=sevenfactors3.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, anomaly11
where hf_performance_portfolio10.date=anomaly11.yyyymm;
quit;

data hf_performance_portfolio10;
set hf_performance_portfolio10;
keep fund_id year month return alpha homo_edu_ratio diver_edu_ratio strategy rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom hml umd rmw cma ps_liquidity bab macro call_otm put_otm emfactor 
mktrf1 smb1 tenyrcmtchange1 baacreditchange1 ptfsbd1 ptfsfx1 ptfscom1 mktrf2 smb2 tenyrcmtchange2 baacreditchange2 ptfsbd2 ptfsfx2 ptfscom2 mktrf3 smb3 tenyrcmtchange3 baacreditchange3 ptfsbd3 ptfsfx3 ptfscom3 _eq2_r2;
diver_edu_ratio=1-homo_edu_ratio;
run;
proc sort out=fund nodupkeys;
by fund_id;
run;

proc export data=hf_performance_portfolio10
   outfile='d:\research\networking\portfolio_sorting_edu.csv'
   dbms=csv
   replace;
run;


proc import datafile="d:\research\networking\hf_performance_portfolio.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;

proc sort out=funds nodupkeys;
by fund_id;
run;
data funds;
set funds;
diver_work_ratio=1-homo_work_ratio;
run;
proc means N mean p25 median p75 std;
var diver_work_ratio;
run;
proc means N mean p25 median p75 std;
var diver_work_ratio;
class strategy;
run;
proc sort data=hf_performance_portfolio;
by year month;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
drop meanreturn meanalpha meanflow;
run;
proc rank groups=3 data=hf_performance_portfolio out=hf_performance_portfolio1; 
var homo_work_ratio;
ranks homowork_rank;
/*** where month=1; ***/
where month=1 and homo_work_ratio not=1 and homo_work_ratio not=0; 
by year;
run;
data hf_performance_portfolio1;
set hf_performance_portfolio1;
homowork_rank1=4-homowork_rank;
keep fund_id year homowork_rank1;
run;
PROC SQL; 
CREATE TABLE hf_performance_portfolio AS
SELECT *
FROM hf_performance_portfolio AS a LEFT JOIN hf_performance_portfolio1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;
data hf_performance_portfolio;
set hf_performance_portfolio;
if homo_work_ratio=1 then homowork_rank1=1;
if homo_work_ratio=0 then homowork_rank1=5;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where homowork_rank1 not=.;
countyr=1;
run;

data hf_performance_portfolio10; set hf_performance_portfolio; run;

data hf_performance_portfolio10;
set hf_performance_portfolio10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;

proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors
where hf_performance_portfolio10.date=sevenfactors.yyyymm;
quit;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data sevenfactors1;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
data sevenfactors1;
set sevenfactors1;
rename mktrf=mktrf1 smb=smb1 tenyrcmtchange=tenyrcmtchange1 baacreditchange=baacreditchange1 ptfsbd=ptfsbd1 ptfsfx=ptfsfx1 ptfscom=ptfscom1;
run;
data sevenfactors1;
set sevenfactors1;
if yyyymm>200004 then mktrf1=0;
if yyyymm>200004 then smb1=0;
if yyyymm>200004 then tenyrcmtchange1=0;
if yyyymm>200004 then baacreditchange1=0;
if yyyymm>200004 then ptfsbd1=0;
if yyyymm>200004 then ptfsfx1=0;
if yyyymm>200004 then ptfscom1=0;
run;

data sevenfactors2;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
data sevenfactors2;
set sevenfactors2;
rename mktrf=mktrf2 smb=smb2 tenyrcmtchange=tenyrcmtchange2 baacreditchange=baacreditchange2 ptfsbd=ptfsbd2 ptfsfx=ptfsfx2 ptfscom=ptfscom2;
run;
data sevenfactors2;
set sevenfactors2;
if yyyymm<=200004 or yyyymm>200809  then mktrf2=0;
if yyyymm<=200004 or yyyymm>200809 then smb2=0;
if yyyymm<=200004 or yyyymm>200809 then tenyrcmtchange2=0;
if yyyymm<=200004 or yyyymm>200809 then baacreditchange2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfsbd2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfsfx2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfscom2=0;
run;

data sevenfactors3;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
data sevenfactors3;
set sevenfactors3;
rename mktrf=mktrf3 smb=smb3 tenyrcmtchange=tenyrcmtchange3 baacreditchange=baacreditchange3 ptfsbd=ptfsbd3 ptfsfx=ptfsfx3 ptfscom=ptfscom3;
run;
data sevenfactors3;
set sevenfactors3;
if yyyymm<=200809 then mktrf3=0;
if yyyymm<=200809 then smb3=0;
if yyyymm<=200809 then tenyrcmtchange3=0;
if yyyymm<=200809 then baacreditchange3=0;
if yyyymm<=200809 then ptfsbd3=0;
if yyyymm<=200809 then ptfsfx3=0;
if yyyymm<=200809 then ptfscom3=0;
run;

proc import datafile="d:\research\networking\anomaly11.csv" dbms=CSV out=anomaly11 replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio10;
set hf_performance_portfolio10;
date=year*100+month;
run;

proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors1
where hf_performance_portfolio10.date=sevenfactors1.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors2
where hf_performance_portfolio10.date=sevenfactors2.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors3
where hf_performance_portfolio10.date=sevenfactors3.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, anomaly11
where hf_performance_portfolio10.date=anomaly11.yyyymm;
quit;
data hf_performance_portfolio10;
set hf_performance_portfolio10;
keep fund_id year month return alpha homo_work_ratio diver_work_ratio strategy rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom hml umd rmw cma ps_liquidity bab macro call_otm put_otm emfactor
mktrf1 smb1 tenyrcmtchange1 baacreditchange1 ptfsbd1 ptfsfx1 ptfscom1 mktrf2 smb2 tenyrcmtchange2 baacreditchange2 ptfsbd2 ptfsfx2 ptfscom2 mktrf3 smb3 tenyrcmtchange3 baacreditchange3 ptfsbd3 ptfsfx3 ptfscom3 _eq2_r2;
diver_work_ratio=1-homo_work_ratio;
run;
proc sort data=fund nodupkeys;
by fund_id;
run;

proc export data=hf_performance_portfolio10
   outfile='d:\research\networking\portfolio_sorting_work.csv'
   dbms=csv
   replace;
run;

proc import datafile="d:\research\networking\hf_performance_portfolio_origin.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
proc sort out=funds nodupkeys;
by fund_id;
run;
data funds;
set funds;
diver_origin_ratio=1-homoorigin_ratio;
run;
proc means N mean p25 median p75 std;
var diver_origin_ratio;
run;
proc means N mean p25 median p75 std;
var diver_origin_ratio;
class strategy;
run;

proc sort data=hf_performance_portfolio out=funds nodupkeys;by fund_id;run;
proc sort data=hf_performance_portfolio;
by year month;
run;

proc rank groups=3 data=hf_performance_portfolio out=hf_performance_portfolio1; 
var homoorigin_ratio;
ranks homoorigin_rank;
/*** where month=1; ***/
where month=1 and homoorigin_ratio not=1 and homoorigin_ratio not=0; 
by year;
run;
data hf_performance_portfolio1;
set hf_performance_portfolio1;
homoorigin_rank1=4-homoorigin_rank;
keep fund_id year homoorigin_rank1;
run;
PROC SQL; 
CREATE TABLE hf_performance_portfolio AS
SELECT *
FROM hf_performance_portfolio AS a LEFT JOIN hf_performance_portfolio1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;
data hf_performance_portfolio;
set hf_performance_portfolio;
if homoorigin_ratio=1 then homoorigin_rank1=1;
if homoorigin_ratio=0 then homoorigin_rank1=5;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where homoorigin_rank1 not=.;
countyr=1;
run;

data hf_performance_portfolio10; set hf_performance_portfolio; run;

data hf_performance_portfolio10;
set hf_performance_portfolio10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;

proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors
where hf_performance_portfolio10.date=sevenfactors.yyyymm;
quit;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;

proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors
where hf_performance_portfolio10.date=sevenfactors.yyyymm;
quit;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data sevenfactors1;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom ;
run;
data sevenfactors1;
set sevenfactors1;
rename mktrf=mktrf1 smb=smb1 tenyrcmtchange=tenyrcmtchange1 baacreditchange=baacreditchange1 ptfsbd=ptfsbd1 ptfsfx=ptfsfx1 ptfscom=ptfscom1;
run;
data sevenfactors1;
set sevenfactors1;
if yyyymm>200004 then mktrf1=0;
if yyyymm>200004 then smb1=0;
if yyyymm>200004 then tenyrcmtchange1=0;
if yyyymm>200004 then baacreditchange1=0;
if yyyymm>200004 then ptfsbd1=0;
if yyyymm>200004 then ptfsfx1=0;
if yyyymm>200004 then ptfscom1=0;
run;

data sevenfactors2;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
data sevenfactors2;
set sevenfactors2;
rename mktrf=mktrf2 smb=smb2 tenyrcmtchange=tenyrcmtchange2 baacreditchange=baacreditchange2 ptfsbd=ptfsbd2 ptfsfx=ptfsfx2 ptfscom=ptfscom2;
run;
data sevenfactors2;
set sevenfactors2;
if yyyymm<=200004 or yyyymm>200809  then mktrf2=0;
if yyyymm<=200004 or yyyymm>200809 then smb2=0;
if yyyymm<=200004 or yyyymm>200809 then tenyrcmtchange2=0;
if yyyymm<=200004 or yyyymm>200809 then baacreditchange2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfsbd2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfsfx2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfscom2=0;
run;

data sevenfactors3;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
data sevenfactors3;
set sevenfactors3;
rename mktrf=mktrf3 smb=smb3 tenyrcmtchange=tenyrcmtchange3 baacreditchange=baacreditchange3 ptfsbd=ptfsbd3 ptfsfx=ptfsfx3 ptfscom=ptfscom3;
run;
data sevenfactors3;
set sevenfactors3;
if yyyymm<=200809 then mktrf3=0;
if yyyymm<=200809 then smb3=0;
if yyyymm<=200809 then tenyrcmtchange3=0;
if yyyymm<=200809 then baacreditchange3=0;
if yyyymm<=200809 then ptfsbd3=0;
if yyyymm<=200809 then ptfsfx3=0;
if yyyymm<=200809 then ptfscom3=0;
run;

proc import datafile="d:\research\networking\anomaly11.csv" dbms=CSV out=anomaly11 replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio10;
set hf_performance_portfolio10;
date=year*100+month;
run;

proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors1
where hf_performance_portfolio10.date=sevenfactors1.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors2
where hf_performance_portfolio10.date=sevenfactors2.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors3
where hf_performance_portfolio10.date=sevenfactors3.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, anomaly11
where hf_performance_portfolio10.date=anomaly11.yyyymm;
quit;
data hf_performance_portfolio10;
set hf_performance_portfolio10;
keep fund_id year month return alpha homoorigin_ratio diver_origin_ratio strategy rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom hml umd rmw cma ps_liquidity bab macro call_otm put_otm emfactor
mktrf1 smb1 tenyrcmtchange1 baacreditchange1 ptfsbd1 ptfsfx1 ptfscom1 mktrf2 smb2 tenyrcmtchange2 baacreditchange2 ptfsbd2 ptfsfx2 ptfscom2 mktrf3 smb3 tenyrcmtchange3 baacreditchange3 ptfsbd3 ptfsfx3 ptfscom3 _eq2_r2;
diver_origin_ratio=1-homoorigin_ratio;
run;

proc sort out=fund nodupkeys;
by fund_id;
run;

proc export data=hf_performance_portfolio10
   outfile='d:\research\networking\portfolio_sorting_origin.csv'
   dbms=csv
   replace;
run;

proc import datafile="d:\research\networking\hf_performance_portfolio_gender.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;

proc sort data=hf_performance_portfolio;
by year month;
run;

proc rank groups=4 data=hf_performance_portfolio out=hf_performance_portfolio1; 
var homo_gender_ratio;
ranks homogender_rank;
/*** where month=1; ***/
where month=1 and homo_gender_ratio not=1; 
by year;
run;

data hf_performance_portfolio1;
set hf_performance_portfolio1;
homogender_rank1=5-homogender_rank;
keep fund_id year homogender_rank1;
run;
PROC SQL; 
CREATE TABLE hf_performance_portfolio AS
SELECT *
FROM hf_performance_portfolio AS a LEFT JOIN hf_performance_portfolio1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;
data hf_performance_portfolio;
set hf_performance_portfolio;
if homo_gender_ratio=1 then homogender_rank1=1;
run;

data hf_performance_portfolio10; set hf_performance_portfolio; run;

data hf_performance_portfolio10;
set hf_performance_portfolio10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;

proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors
where hf_performance_portfolio10.date=sevenfactors.yyyymm;
quit;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data sevenfactors1;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
data sevenfactors1;
set sevenfactors1;
rename mktrf=mktrf1 smb=smb1 tenyrcmtchange=tenyrcmtchange1 baacreditchange=baacreditchange1 ptfsbd=ptfsbd1 ptfsfx=ptfsfx1 ptfscom=ptfscom1;
run;
data sevenfactors1;
set sevenfactors1;
if yyyymm>200004 then mktrf1=0;
if yyyymm>200004 then smb1=0;
if yyyymm>200004 then tenyrcmtchange1=0;
if yyyymm>200004 then baacreditchange1=0;
if yyyymm>200004 then ptfsbd1=0;
if yyyymm>200004 then ptfsfx1=0;
if yyyymm>200004 then ptfscom1=0;
run;

data sevenfactors2;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
data sevenfactors2;
set sevenfactors2;
rename mktrf=mktrf2 smb=smb2 tenyrcmtchange=tenyrcmtchange2 baacreditchange=baacreditchange2 ptfsbd=ptfsbd2 ptfsfx=ptfsfx2 ptfscom=ptfscom2;
run;
data sevenfactors2;
set sevenfactors2;
if yyyymm<=200004 or yyyymm>200809  then mktrf2=0;
if yyyymm<=200004 or yyyymm>200809 then smb2=0;
if yyyymm<=200004 or yyyymm>200809 then tenyrcmtchange2=0;
if yyyymm<=200004 or yyyymm>200809 then baacreditchange2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfsbd2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfsfx2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfscom2=0;
run;

data sevenfactors3;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
data sevenfactors3;
set sevenfactors3;
rename mktrf=mktrf3 smb=smb3 tenyrcmtchange=tenyrcmtchange3 baacreditchange=baacreditchange3 ptfsbd=ptfsbd3 ptfsfx=ptfsfx3 ptfscom=ptfscom3;
run;
data sevenfactors3;
set sevenfactors3;
if yyyymm<=200809 then mktrf3=0;
if yyyymm<=200809 then smb3=0;
if yyyymm<=200809 then tenyrcmtchange3=0;
if yyyymm<=200809 then baacreditchange3=0;
if yyyymm<=200809 then ptfsbd3=0;
if yyyymm<=200809 then ptfsfx3=0;
if yyyymm<=200809 then ptfscom3=0;
run;

proc import datafile="d:\research\networking\anomaly11.csv" dbms=CSV out=anomaly11 replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio10;
set hf_performance_portfolio10;
date=year*100+month;
run;

proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors1
where hf_performance_portfolio10.date=sevenfactors1.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors2
where hf_performance_portfolio10.date=sevenfactors2.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors3
where hf_performance_portfolio10.date=sevenfactors3.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, anomaly11
where hf_performance_portfolio10.date=anomaly11.yyyymm;
quit;
data hf_performance_portfolio10;
set hf_performance_portfolio10;
keep fund_id year month return alpha homo_gender_ratio diver_gender_ratio strategy rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom hml umd rmw cma ps_liquidity bab macro call_otm put_otm emfactor
mktrf1 smb1 tenyrcmtchange1 baacreditchange1 ptfsbd1 ptfsfx1 ptfscom1 mktrf2 smb2 tenyrcmtchange2 baacreditchange2 ptfsbd2 ptfsfx2 ptfscom2 mktrf3 smb3 tenyrcmtchange3 baacreditchange3 ptfsbd3 ptfsfx3 ptfscom3 _eq2_r2;
diver_gender_ratio=1-homo_gender_ratio;
run;

proc sort out=fund nodupkeys;
by fund_id;
run;

proc export data=hf_performance_portfolio10
   outfile='d:\research\networking\portfolio_sorting_gender.csv'
   dbms=csv
   replace;
run;

proc import datafile="d:\research\networking\hf_performance_portfolio_eth.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;

proc sort data=hf_performance_portfolio;
by year month;
run;

proc rank groups=3 data=hf_performance_portfolio out=hf_performance_portfolio1; 
var homoeth_ratio;
ranks homoeth_rank;
/*** where month=1; ***/
where month=1 and homoeth_ratio not=1 and homoeth_ratio not=0; 
by year;
run;


data hf_performance_portfolio1;
set hf_performance_portfolio1;
homoeth_rank1=4-homoeth_rank;
keep fund_id year homoeth_rank1;
run;
PROC SQL; 
CREATE TABLE hf_performance_portfolio AS
SELECT *
FROM hf_performance_portfolio AS a LEFT JOIN hf_performance_portfolio1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;
data hf_performance_portfolio;
set hf_performance_portfolio;
if homoeth_ratio=1 then homoeth_rank1=1;
if homoeth_ratio=0 then homoeth_rank1=5;
run;

data hf_performance_portfolio10; set hf_performance_portfolio; run;

data hf_performance_portfolio10;
set hf_performance_portfolio10;
drop rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;

proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors
where hf_performance_portfolio10.date=sevenfactors.yyyymm;
quit;
proc import datafile="d:\research\testosterone\7factors.csv" dbms=CSV out=sevenfactors replace;
getnames=yes;
guessingrows=500;
run;
data sevenfactors1;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
data sevenfactors1;
set sevenfactors1;
rename mktrf=mktrf1 smb=smb1 tenyrcmtchange=tenyrcmtchange1 baacreditchange=baacreditchange1 ptfsbd=ptfsbd1 ptfsfx=ptfsfx1 ptfscom=ptfscom1;
run;
data sevenfactors1;
set sevenfactors1;
if yyyymm>200004 then mktrf1=0;
if yyyymm>200004 then smb1=0;
if yyyymm>200004 then tenyrcmtchange1=0;
if yyyymm>200004 then baacreditchange1=0;
if yyyymm>200004 then ptfsbd1=0;
if yyyymm>200004 then ptfsfx1=0;
if yyyymm>200004 then ptfscom1=0;
run;

data sevenfactors2;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
data sevenfactors2;
set sevenfactors2;
rename mktrf=mktrf2 smb=smb2 tenyrcmtchange=tenyrcmtchange2 baacreditchange=baacreditchange2 ptfsbd=ptfsbd2 ptfsfx=ptfsfx2 ptfscom=ptfscom2;
run;
data sevenfactors2;
set sevenfactors2;
if yyyymm<=200004 or yyyymm>200809  then mktrf2=0;
if yyyymm<=200004 or yyyymm>200809 then smb2=0;
if yyyymm<=200004 or yyyymm>200809 then tenyrcmtchange2=0;
if yyyymm<=200004 or yyyymm>200809 then baacreditchange2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfsbd2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfsfx2=0;
if yyyymm<=200004 or yyyymm>200809 then ptfscom2=0;
run;

data sevenfactors3;
set sevenfactors;
keep yyyymm mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom;
run;
data sevenfactors3;
set sevenfactors3;
rename mktrf=mktrf3 smb=smb3 tenyrcmtchange=tenyrcmtchange3 baacreditchange=baacreditchange3 ptfsbd=ptfsbd3 ptfsfx=ptfsfx3 ptfscom=ptfscom3;
run;
data sevenfactors3;
set sevenfactors3;
if yyyymm<=200809 then mktrf3=0;
if yyyymm<=200809 then smb3=0;
if yyyymm<=200809 then tenyrcmtchange3=0;
if yyyymm<=200809 then baacreditchange3=0;
if yyyymm<=200809 then ptfsbd3=0;
if yyyymm<=200809 then ptfsfx3=0;
if yyyymm<=200809 then ptfscom3=0;
run;

proc import datafile="d:\research\networking\anomaly11.csv" dbms=CSV out=anomaly11 replace;
getnames=yes;
guessingrows=500;
run;
data hf_performance_portfolio10;
set hf_performance_portfolio10;
date=year*100+month;
run;

proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors1
where hf_performance_portfolio10.date=sevenfactors1.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors2
where hf_performance_portfolio10.date=sevenfactors2.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, sevenfactors3
where hf_performance_portfolio10.date=sevenfactors3.yyyymm;
quit;
proc sql;
create table hf_performance_portfolio10 as
select * 
from hf_performance_portfolio10, anomaly11
where hf_performance_portfolio10.date=anomaly11.yyyymm;
quit;
data hf_performance_portfolio10;
set hf_performance_portfolio10;
keep fund_id year month return alpha homoeth_ratio diver_eth_ratio strategy rf mktrf smb tenyrcmtchange baacreditchange ptfsbd ptfsfx ptfscom hml umd rmw cma ps_liquidity bab macro call_otm put_otm emfactor
mktrf1 smb1 tenyrcmtchange1 baacreditchange1 ptfsbd1 ptfsfx1 ptfscom1 mktrf2 smb2 tenyrcmtchange2 baacreditchange2 ptfsbd2 ptfsfx2 ptfscom2 mktrf3 smb3 tenyrcmtchange3 baacreditchange3 ptfsbd3 ptfsfx3 ptfscom3 _eq2_r2;
diver_eth_ratio=1-homoeth_ratio;
run;
proc sort out=fund nodupkeys;
by fund_id;
run;

proc export data=hf_performance_portfolio10
   outfile='d:\research\networking\portfolio_sorting_eth.csv'
   dbms=csv
   replace;
run;

/*** reconcile sorting results ***/
/*** edu ***/
proc import datafile="d:\research\networking\portfolio_sorting_edu.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;
proc sort; 
by fund_id year month;
run;


data  hf_performance_portfolio(drop=i count);
set hf_performance_portfolio;
by fund_id;
array x(*) diver_edu_ratio_lag1-diver_edu_ratio_lag2;
diver_edu_ratio_lag1=lag1(diver_edu_ratio);
diver_edu_ratio_lag2=lag2(diver_edu_ratio);
if first.fund_id then count=1;
do i=count to dim(x);
x(i)=.;
end;
count + 1;
run;
proc sort; 
by year;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where year>1996;
run;

proc rank groups=3 data=hf_performance_portfolio out=hf_performance_portfolio1; 
var diver_edu_ratio_lag1;
ranks diveredu_rank;
/*** where month=1; ***/
where month=1 and diver_edu_ratio_lag1 not=1 and diver_edu_ratio_lag1 not=0; 
by year;
run;

data hf_performance_portfolio1;
set hf_performance_portfolio1;
diveredu_rank1=4-diveredu_rank;
keep fund_id year diveredu_rank1;
run;
PROC SQL; 
CREATE TABLE hf_performance_portfolio AS
SELECT *
FROM hf_performance_portfolio AS a LEFT JOIN hf_performance_portfolio1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;
data hf_performance_portfolio;
set hf_performance_portfolio;
if diver_edu_ratio_lag1=1 then diveredu_rank1=1;
if diver_edu_ratio_lag1=0 then diveredu_rank1=5;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where diveredu_rank1 not=.;
run;
data rank1;set hf_performance_portfolio;where diveredu_rank1=1;run;
proc sort nodupkeys;by fund_id;run;
data rank2;set hf_performance_portfolio;where diveredu_rank1=2;run;
proc sort nodupkeys;by fund_id;run;
data rank3;set hf_performance_portfolio;where diveredu_rank1=3;run;
proc sort nodupkeys;by fund_id;run;
data rank4;set hf_performance_portfolio;where diveredu_rank1=4;run;
proc sort nodupkeys;by fund_id;run;
data rank5;set hf_performance_portfolio;where diveredu_rank1=5;run;
proc sort nodupkeys;by fund_id;run;

proc sql; 
create table hf_performance_portfolio as
select *, mean(return) as meanreturn, mean(alpha) as meanalpha, mean(_eq2_r2) as meanr2
from hf_performance_portfolio
group by diveredu_rank1, year,month;
quit;
data hf_performance_portfolio;
set hf_performance_portfolio;
excessreturn=meanreturn-rf;
date=year*100+month;
run;
proc sort nodupkeys; by diveredu_rank1 year month;run;

proc means data=hf_performance_portfolio;
var excessreturn meanalpha meanr2;
by diveredu_rank1;
run;

proc ttest data=hf_performance_portfolio;
var excessreturn;
by diveredu_rank1;
run;
proc ttest;
var excessreturn;
class diveredu_rank1;
where diveredu_rank1=1 or diveredu_rank1=5;
run;

proc ttest;
var meanalpha;
class diveredu_rank1;
where diveredu_rank1=1 or diveredu_rank1=5;
run;

proc export data=hf_performance_portfolio
outfile='d:\research\networking\hf_performance_portfolio10_edu.csv'
dbms=csv
replace;
run;

data hf_performance_portfolio_1;
set hf_performance_portfolio;
where diveredu_rank1=1;
rename excessreturn=excessreturn1;
run;

data hf_performance_portfolio_5;
set hf_performance_portfolio;
where diveredu_rank1=5;
rename excessreturn=excessreturn5;
keep excessreturn date fund_id;
run;

proc sql;
create table hf_performance_portfolio_s as
select * 
from hf_performance_portfolio_1, hf_performance_portfolio_5
where hf_performance_portfolio_1.date=hf_performance_portfolio_5.date;
quit;
data hf_performance_portfolio_s;
set hf_performance_portfolio_s;
excessreturn_s=excessreturn1-excessreturn5;
run;
proc export data=hf_performance_portfolio_s
outfile='d:\research\networking\hf_performance_portfolio_s_edu.csv'
dbms=csv
replace;
run;

/*** work ***/
proc import datafile="d:\research\networking\portfolio_sorting_work.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=50000;
run;

proc sort; 
by fund_id year month;
run;

data  hf_performance_portfolio(drop=i count);
set hf_performance_portfolio;
by fund_id;
array x(*) diver_work_ratio_lag1-diver_work_ratio_lag2;
diver_work_ratio_lag1=lag1(diver_work_ratio);
diver_work_ratio_lag2=lag2(diver_work_ratio);
if first.fund_id then count=1;
do i=count to dim(x);
x(i)=.;
end;
count + 1;
run;
proc sort; 
by year;
run;

data hf_performance_portfolio;
set hf_performance_portfolio;
where year>1996;
run;

proc rank groups=3 data=hf_performance_portfolio out=hf_performance_portfolio1; 
var diver_work_ratio_lag1;
ranks diverwork_rank;
/*** where month=1; ***/
where month=1 & return_lead12 not=. and diver_work_ratio_lag1 not=1 and diver_work_ratio_lag1 not=0; 
by year;
run;
data hf_performance_portfolio1;
set hf_performance_portfolio1;
diverwork_rank1=4-diverwork_rank;
keep fund_id year diverwork_rank1;
run;
PROC SQL; 
CREATE TABLE hf_performance_portfolio AS
SELECT *
FROM hf_performance_portfolio AS a LEFT JOIN hf_performance_portfolio1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;
data hf_performance_portfolio;
set hf_performance_portfolio;
if diver_work_ratio_lag1=1 then diverwork_rank1=1;
if diver_work_ratio_lag1=0 then diverwork_rank1=5;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where diverwork_rank1 not=.;
run;
data rank1;set hf_performance_portfolio;where diverwork_rank1=1;run;
proc sort nodupkeys;by fund_id;run;
data rank2;set hf_performance_portfolio;where diverwork_rank1=2;run;
proc sort nodupkeys;by fund_id;run;
data rank3;set hf_performance_portfolio;where diverwork_rank1=3;run;
proc sort nodupkeys;by fund_id;run;
data rank4;set hf_performance_portfolio;where diverwork_rank1=4;run;
proc sort nodupkeys;by fund_id;run;
data rank5;set hf_performance_portfolio;where diverwork_rank1=5;run;
proc sort nodupkeys;by fund_id;run;

proc sql; 
create table hf_performance_portfolio as
select *, mean(return) as meanreturn, mean(alpha) as meanalpha, mean(_eq2_r2) as meanr2
from hf_performance_portfolio
group by diverwork_rank1, year,month;
quit;
proc sort nodupkeys; by diverwork_rank1 year month;run;
data hf_performance_portfolio;
set hf_performance_portfolio;
excessreturn=meanreturn-rf;
date=year*100+month;
run;

proc means data=hf_performance_portfolio;
var excessreturn meanr2;
by diverwork_rank1;
run;
proc ttest data=hf_performance_portfolio;
var excessreturn;
by diverwork_rank1;
run;

proc ttest;
var excessreturn;
class diverwork_rank1;
where diverwork_rank1=1 or diverwork_rank1=5;
run;
proc ttest;
var meanalpha;
class diverwork_rank1;
where diverwork_rank1=1 or diverwork_rank1=5;
run;

proc sort data=hf_performance_portfolio;by diverwork_rank1;run;

proc export data=hf_performance_portfolio
outfile='d:\research\networking\hf_performance_portfolio10_work.csv'
dbms=csv
replace;
run;
data hf_performance_portfolio_1;
set hf_performance_portfolio;
where diverwork_rank1=1;
rename excessreturn=excessreturn1;
run;
data hf_performance_portfolio_5;
set hf_performance_portfolio;
where diverwork_rank1=5;
rename excessreturn=excessreturn5;
keep excessreturn date;
run;
proc sql;
create table hf_performance_portfolio_s as
select * 
from hf_performance_portfolio_1, hf_performance_portfolio_5
where hf_performance_portfolio_1.date=hf_performance_portfolio_5.date;
quit;
data hf_performance_portfolio_s;
set hf_performance_portfolio_s;
excessreturn_s=excessreturn1-excessreturn5;
run;
proc export data=hf_performance_portfolio_s
outfile='d:\research\networking\hf_performance_portfolio_s_work.csv'
dbms=csv
replace;
run;

/*** origin ***/
proc import datafile="d:\research\networking\portfolio_sorting_origin.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=500000;
run;

proc sort; 
by fund_id year month;
run;

data  hf_performance_portfolio(drop=i count);
set hf_performance_portfolio;
by fund_id;
array x(*) diver_origin_ratio_lag1-diver_origin_ratio_lag2;
diver_origin_ratio_lag1=lag1(diver_origin_ratio);
diver_origin_ratio_lag2=lag2(diver_origin_ratio);
if first.fund_id then count=1;
do i=count to dim(x);
x(i)=.;
end;
count + 1;
run;
proc sort; 
by year;
run;

data hf_performance_portfolio;
set hf_performance_portfolio;
where year>1996;
run;

proc rank groups=3 data=hf_performance_portfolio out=hf_performance_portfolio1; 
var diver_origin_ratio_lag1;
ranks diverorigin_rank;
/*** where month=1; ***/
where month=1 and diver_origin_ratio_lag1 not=1 and diver_origin_ratio_lag1 not=0; 
by year;
run;
data hf_performance_portfolio1;
set hf_performance_portfolio1;
diverorigin_rank1=4-diverorigin_rank;
keep fund_id year diverorigin_rank1;
run;
PROC SQL; 
CREATE TABLE hf_performance_portfolio AS
SELECT *
FROM hf_performance_portfolio AS a LEFT JOIN hf_performance_portfolio1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;
data hf_performance_portfolio;
set hf_performance_portfolio;
if diver_origin_ratio_lag1=1 then diverorigin_rank1=1;
if diver_origin_ratio_lag1=0 then diverorigin_rank1=5;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where diverorigin_rank1 not=.;
run;
data rank1;set hf_performance_portfolio;where diverorigin_rank1=1;run;
proc sort nodupkeys;by fund_id;run;
data rank2;set hf_performance_portfolio;where diverorigin_rank1=2;run;
proc sort nodupkeys;by fund_id;run;
data rank3;set hf_performance_portfolio;where diverorigin_rank1=3;run;
proc sort nodupkeys;by fund_id;run;
data rank4;set hf_performance_portfolio;where diverorigin_rank1=4;run;
proc sort nodupkeys;by fund_id;run;
data rank5;set hf_performance_portfolio;where diverorigin_rank1=5;run;
proc sort nodupkeys;by fund_id;run;

proc sql; 
create table hf_performance_portfolio as
select *, mean(return) as meanreturn, mean(alpha) as meanalpha, mean(_eq2_r2) as meanr2
from hf_performance_portfolio
group by diverorigin_rank1, year,month;
quit;
proc sort nodupkeys; by diverorigin_rank1 year month;run;
data hf_performance_portfolio;
set hf_performance_portfolio;
excessreturn=meanreturn-rf;
date=year*100+month;
run;

proc means data=hf_performance_portfolio;
var excessreturn meanr2;
by diverorigin_rank1;
run;
proc ttest data=hf_performance_portfolio;
var excessreturn;
by diverorigin_rank1;
run;

proc ttest;
var excessreturn;
class diverorigin_rank1;
where diverorigin_rank1=1 or diverorigin_rank1=5;
run;
proc ttest;
var meanalpha;
class diverorigin_rank1;
where diverorigin_rank1=1 or diverorigin_rank1=5;
run;

proc sort data=hf_performance_portfolio;by homoorigin_rank1;run;

proc export data=hf_performance_portfolio
outfile='d:\research\networking\hf_performance_portfolio10_origin.csv'
dbms=csv
replace;
run;
data hf_performance_portfolio_1;
set hf_performance_portfolio;
where diverorigin_rank1=1;
rename excessreturn=excessreturn1;
run;
data hf_performance_portfolio_5;
set hf_performance_portfolio;
where diverorigin_rank1=5;
rename excessreturn=excessreturn5;
keep excessreturn date;
run;
proc sql;
create table hf_performance_portfolio_s as
select * 
from hf_performance_portfolio_1, hf_performance_portfolio_5
where hf_performance_portfolio_1.date=hf_performance_portfolio_5.date;
quit;
data hf_performance_portfolio_s;
set hf_performance_portfolio_s;
excessreturn_s=excessreturn1-excessreturn5;
run;
proc export data=hf_performance_portfolio_s
outfile='d:\research\networking\hf_performance_portfolio_s_origin.csv'
dbms=csv
replace;
run;

/*** gender ***/
proc import datafile="d:\research\networking\portfolio_sorting_gender.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=500000;
run;

proc sort; 
by fund_id year month;
run;

data  hf_performance_portfolio(drop=i count);
set hf_performance_portfolio;
by fund_id;
array x(*) diver_gender_ratio_lag1-diver_gender_ratio_lag2;
diver_gender_ratio_lag1=lag1(diver_gender_ratio);
diver_gender_ratio_lag2=lag2(diver_gender_ratio);
if first.fund_id then count=1;
do i=count to dim(x);
x(i)=.;
end;
count + 1;
run;
proc sort; 
by year;
run;

data hf_performance_portfolio;
set hf_performance_portfolio;
where year>1996;
run;

proc rank groups=4 data=hf_performance_portfolio out=hf_performance_portfolio1; 
var diver_gender_ratio_lag1;
ranks divergender_rank;
/*** where month=1; ***/
where month=1 and diver_gender_ratio not=0; 
by year;
run;
data hf_performance_portfolio1;
set hf_performance_portfolio1;
divergender_rank1=4-divergender_rank;
keep fund_id year divergender_rank1;
run;
PROC SQL; 
CREATE TABLE hf_performance_portfolio AS
SELECT *
FROM hf_performance_portfolio AS a LEFT JOIN hf_performance_portfolio1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;
data hf_performance_portfolio;
set hf_performance_portfolio;
if diver_gender_ratio_lag1=0 then divergender_rank1=5;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where divergender_rank1 not=.;
run;
data rank1;set hf_performance_portfolio;where divergender_rank1=1;run;
proc sort nodupkeys;by fund_id;run;
data rank2;set hf_performance_portfolio;where divergender_rank1=2;run;
proc sort nodupkeys;by fund_id;run;
data rank3;set hf_performance_portfolio;where divergender_rank1=3;run;
proc sort nodupkeys;by fund_id;run;
data rank4;set hf_performance_portfolio;where divergender_rank1=4;run;
proc sort nodupkeys;by fund_id;run;
data rank5;set hf_performance_portfolio;where divergender_rank1=5;run;
proc sort nodupkeys;by fund_id;run;

proc sql; 
create table hf_performance_portfolio as
select *, mean(return) as meanreturn, mean(alpha) as meanalpha, mean(_eq2_r2) as meanr2
from hf_performance_portfolio
group by divergender_rank1, year,month;
quit;
proc sort nodupkeys; by divergender_rank1 year month;run;
data hf_performance_portfolio;
set hf_performance_portfolio;
excessreturn=meanreturn-rf;
date=year*100+month;
run;

proc means data=hf_performance_portfolio;
var excessreturn meanr2;
by divergender_rank1;
run;
proc ttest data=hf_performance_portfolio;
var excessreturn;
by divergender_rank1;
run;

proc ttest;
var excessreturn meanalpha;
class divergender_rank1;
where divergender_rank1=1 or divergender_rank1=5;
run;

proc sort data=hf_performance_portfolio;by divergender_rank1;run;

proc export data=hf_performance_portfolio
outfile='d:\research\networking\hf_performance_portfolio10_gender.csv'
dbms=csv
replace;
run;
data hf_performance_portfolio_1;
set hf_performance_portfolio;
where divergender_rank1=1;
rename excessreturn=excessreturn1;
run;
data hf_performance_portfolio_5;
set hf_performance_portfolio;
where divergender_rank1=5;
rename excessreturn=excessreturn5;
keep excessreturn date;
run;
proc sql;
create table hf_performance_portfolio_s as
select * 
from hf_performance_portfolio_1, hf_performance_portfolio_5
where hf_performance_portfolio_1.date=hf_performance_portfolio_5.date;
quit;
data hf_performance_portfolio_s;
set hf_performance_portfolio_s;
excessreturn_s=excessreturn1-excessreturn5;
run;
proc export data=hf_performance_portfolio_s
outfile='d:\research\networking\hf_performance_portfolio_s_gender.csv'
dbms=csv
replace;
run;


/*** eth ***/
proc import datafile="d:\research\networking\portfolio_sorting_eth.csv" dbms=CSV out=hf_performance_portfolio replace;
getnames=yes;
guessingrows=500000;
run;

proc sort; 
by fund_id year month;
run;

data  hf_performance_portfolio(drop=i count);
set hf_performance_portfolio;
by fund_id;
array x(*) diver_eth_ratio_lag1-diver_eth_ratio_lag2;
diver_eth_ratio_lag1=lag1(diver_eth_ratio);
diver_eth_ratio_lag2=lag2(diver_eth_ratio);
if first.fund_id then count=1;
do i=count to dim(x);
x(i)=.;
end;
count + 1;
run;
proc sort; 
by year;
run;

data hf_performance_portfolio;
set hf_performance_portfolio;
where year>1996;
run;

proc rank groups=3 data=hf_performance_portfolio out=hf_performance_portfolio1
ties=low;
var diver_eth_ratio_lag1;
ranks divereth_rank;
/*** where month=1; ***/
where month=1  and diver_eth_ratio_lag1 not=1 and diver_eth_ratio_lag1 not=0; 
by year;
run;
data hf_performance_portfolio1;
set hf_performance_portfolio1;
divereth_rank1=4-divereth_rank;
keep fund_id year divereth_rank1;
run;
PROC SQL; 
CREATE TABLE hf_performance_portfolio AS
SELECT *
FROM hf_performance_portfolio AS a LEFT JOIN hf_performance_portfolio1 AS b
ON a.fund_id=b.fund_id and a.year=b.year;
QUIT;
data hf_performance_portfolio;
set hf_performance_portfolio;
if diver_eth_ratio_lag1=1 then divereth_rank1=1;
if diver_eth_ratio_lag1=0 then divereth_rank1=5;
run;
data hf_performance_portfolio;
set hf_performance_portfolio;
where divereth_rank1 not=.;
run;
data rank1;set hf_performance_portfolio;where divereth_rank1=1;run;
proc sort nodupkeys;by fund_id;run;
data rank2;set hf_performance_portfolio;where divereth_rank1=2;run;
proc sort nodupkeys;by fund_id;run;
data rank3;set hf_performance_portfolio;where divereth_rank1=3;run;
proc sort nodupkeys;by fund_id;run;
data rank4;set hf_performance_portfolio;where divereth_rank1=4;run;
proc sort nodupkeys;by fund_id;run;
data rank5;set hf_performance_portfolio;where divereth_rank1=5;run;
proc sort nodupkeys;by fund_id;run;

proc sql; 
create table hf_performance_portfolio as
select *, mean(return) as meanreturn, mean(alpha) as meanalpha, mean(_eq2_r2) as meanr2
from hf_performance_portfolio
group by divereth_rank1, year,month;
quit;
proc sort nodupkeys; by divereth_rank1 year month;run;
data hf_performance_portfolio;
set hf_performance_portfolio;
excessreturn=meanreturn-rf;
date=year*100+month;
run;

proc means data=hf_performance_portfolio;
var excessreturn meanr2;
by divereth_rank1;
run;
proc ttest data=hf_performance_portfolio;
var excessreturn;
by divereth_rank1;
run;

proc ttest;
var excessreturn meanalpha;
class divereth_rank1;
where divereth_rank1=1 or divereth_rank1=5;
run;

proc sort data=hf_performance_portfolio;by divereth_rank1;run;

proc export data=hf_performance_portfolio
outfile='d:\research\networking\hf_performance_portfolio10_eth.csv'
dbms=csv
replace;
run;
data hf_performance_portfolio_1;
set hf_performance_portfolio;
where divereth_rank1=1;
rename excessreturn=excessreturn1;
run;
data hf_performance_portfolio_5;
set hf_performance_portfolio;
where divereth_rank1=5;
rename excessreturn=excessreturn5;
keep excessreturn date;
run;
proc sql;
create table hf_performance_portfolio_s as
select * 
from hf_performance_portfolio_1, hf_performance_portfolio_5
where hf_performance_portfolio_1.date=hf_performance_portfolio_5.date;
quit;
data hf_performance_portfolio_s;
set hf_performance_portfolio_s;
excessreturn_s=excessreturn1-excessreturn5;
run;
proc export data=hf_performance_portfolio_s
outfile='d:\research\networking\hf_performance_portfolio_s_eth.csv'
dbms=csv
replace;
run;

/*** MF summary ***/
proc import datafile="d:\research\networking\summary.csv" dbms=CSV out=summary_mf replace;
getnames=yes;
guessingrows=50000;
run;
/*** delete index funds ***/
data summary_mf;
set summary_mf;
where index_fund_flag ='';
run;
data summary_mf;
set summary_mf;
where mgr_name not='';
run;
proc sort nodupkeys;
by crsp_fundno;
run;

data summary_mf;
set summary_mf;
where mgr_name not='Team Managed';
run;
data summary_mf;
set summary_mf;
manager_name1=scan(mgr_name,1,'/');
manager_name2=scan(mgr_name,2,'/');
manager_name3=scan(mgr_name,3,'/');
manager_name4=scan(mgr_name,4,'/');
manager_name5=scan(mgr_name,5,'/');
manager_name6=scan(mgr_name,6,'/');
manager_name7=scan(mgr_name,7,'/');
manager_name8=scan(mgr_name,8,'/');
run;

data summary_mf;
set summary_mf;
where manager_name2 not='';
run;
/*** check how many unique teams ***/
proc sort data=summary_mf out=team nodupkeys;
by mgmt_name crsp_fundno mgr_name;
run;
data team;
set team;
countwords=countw(manager_name1);
run;
data team;
set team;
where countwords>1;
run;
data team;
set team;
keep mgmt_name crsp_fundno fund_name manager_name1-manager_name8;
run;
proc sort;
by  mgmt_name crsp_fundno fund_name;
run;
proc transpose data=team
out=team1;
var manager_name1-manager_name8;
by  mgmt_name crsp_fundno fund_name;
run;
data team1;
set team1;
rename col1=manager_name;
keep mgmt_name crsp_fundno fund_name col1;
where col1 not="";
run;
proc export data=team1
outfile='d:\research\networking\summary_manager.csv'
dbms=csv
replace;
run;




proc sort nodupkeys;
by manager_name;
run;
data team1;
set team1;
countwords=countw(manager_name);
run;
data team1;
set team1;
where countwords>1;
run;
data team1;
set team1;
first=scan(manager_name,1,1);
run;
data team1;
set team1;
if first="A" or first="B" or first="C" or first="D" or first="E" or first="F" or first="G" or first="H" or first="I" or first="J" or first="K" or first="L" or first="M" or first="N"  
or first="O" or first="P" or first="Q" or first="R" or first="S" or first="T" or first="U" or first="V" or first="W" or first="X" or first="Y" or first="Z" then delete;
run;
data team1;
set team1;
keep crsp_fundno fund_name manager_name;
run;

proc export data=team1
outfile='d:\research\networking\manager_mf.csv'
dbms=csv
replace;
run;
proc import datafile="d:\research\networking\fund_list.csv" dbms=CSV out=fund_list replace;
getnames=yes;
guessingrows=500;
run;
proc import datafile="d:\research\networking\manager_mf_name.csv" dbms=CSV out=manager_last replace;
getnames=yes;
guessingrows=500;
run;
data manager_last;
set manager_last;
last=scan(manager_name,-1,1);
first=scan(manager_name,1,1);
run;
proc export data=manager_last
outfile='d:\research\networking\manager_last.csv'
dbms=csv
replace;
run;
